forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvirtiostat.py
executable file
·289 lines (246 loc) · 8.75 KB
/
virtiostat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/usr/bin/python
# @lint-avoid-python-3-compatibility-imports
#
# virtiostat Show virtio devices input/output statistics.
# For Linux, uses BCC, eBPF.
#
# USAGE: virtiostat [-h] [-T] [-D] [-d DRIVER] [-n DEVNAME] [INTERVAL] [COUNT]
#
# Copyright (c) 2021 zhenwei pi
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 13-Feb-2021 zhenwei pi Created this.
from __future__ import print_function
from bcc import BPF
from time import sleep, strftime
import argparse
# arguments
examples = """examples:
./virtiostat # print 3(default) second summaries
./virtiostat 1 10 # print 1 second summaries, 10 times
./virtiostat -T # show timestamps
./virtiostat -d virtio_blk # only show virtio block devices
./virtiostat -n virtio0 # only show virtio0 device
./virtiostat -D # show debug bpf text
"""
parser = argparse.ArgumentParser(
description="Show virtio devices input/output statistics",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("interval", nargs="?", default=3,
help="output interval, in seconds")
parser.add_argument("count", nargs="?", default=99999999,
help="number of outputs")
parser.add_argument("-T", "--timestamp", action="store_true",
help="show timestamp on output")
parser.add_argument("-d", "--driver",
help="filter for driver name")
parser.add_argument("-n", "--devname",
help="filter for device name")
parser.add_argument("-D", "--debug", action="store_true",
help="print BPF program before starting (for debugging purposes)")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
# define BPF program
bpf_text = """
#ifndef KBUILD_MODNAME
#define KBUILD_MODNAME "bcc"
#endif
#include <linux/virtio.h>
#include <bcc/proto.h>
/* typically virtio scsi has max SGs of 6 */
#define VIRTIO_MAX_SGS 6
/* typically virtio blk has max SEG of 128 */
#define SG_MAX 128
/* local strcmp function, max length 16 to protect instruction loops */
#define CMPMAX 16
static int local_strcmp(const char *cs, const char *ct)
{
int len = 0;
unsigned char c1, c2;
while (len++ < CMPMAX) {
c1 = *cs++;
c2 = *ct++;
if (c1 != c2)
return c1 < c2 ? -1 : 1;
if (!c1)
break;
}
return 0;
}
typedef struct virtio_stat {
char driver[16];
char dev[12];
char vqname[12];
u32 in_sgs;
u32 out_sgs;
u64 in_bw;
u64 out_bw;
} virtio_stat_t;
BPF_HASH(stats, u64, virtio_stat_t);
static struct scatterlist *__sg_next(struct scatterlist *sgp)
{
struct scatterlist sg;
bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
if (sg_is_last(&sg))
return NULL;
sgp++;
bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
if (unlikely(sg_is_chain(&sg)))
sgp = sg_chain_ptr(&sg);
return sgp;
}
static u64 count_len(struct scatterlist **sgs, unsigned int num)
{
u64 length = 0;
unsigned int i, n;
struct scatterlist *sgp = NULL;
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
for (n = 0, sgp = sgs[i]; sgp && (n < SG_MAX); sgp = __sg_next(sgp)) {
length += sgp->length;
n++;
}
/* Suggested by Yonghong Song:
* IndVarSimplifyPass with clang 12 may cause verifier failure:
* ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) { // Line 60
* 90: 15 08 15 00 00 00 00 00 if r8 == 0 goto +21
* 91: bf 81 00 00 00 00 00 00 r1 = r8
* 92: 07 01 00 00 ff ff ff ff r1 += -1
* 93: 67 01 00 00 20 00 00 00 r1 <<= 32
* 94: 77 01 00 00 20 00 00 00 r1 >>= 32
* 95: b7 02 00 00 05 00 00 00 r2 = 5
* 96: 2d 12 01 00 00 00 00 00 if r2 > r1 goto +1
* 97: b7 08 00 00 06 00 00 00 r8 = 6
* 98: b7 02 00 00 00 00 00 00 r2 = 0
* 99: b7 09 00 00 00 00 00 00 r9 = 0
* 100: 7b 8a 68 ff 00 00 00 00 *(u64 *)(r10 - 152) = r8
* 101: 05 00 35 00 00 00 00 00 goto +53
* Note that r1 is refined by r8 is saved to stack for later use.
* This will give verifier u64_max loop bound and eventually cause
* verification failure. Workaround with the below asm code.
*/
#if __clang_major__ >= 7
asm volatile("" : "=r"(i) : "0"(i));
#endif
}
return length;
}
static void record(struct virtqueue *vq, struct scatterlist **sgs,
unsigned int out_sgs, unsigned int in_sgs)
{
virtio_stat_t newvs = {0};
virtio_stat_t *vs;
u64 key = (u64)vq;
u64 in_bw = 0;
DRIVERFILTER
DEVNAMEFILTER
/* Workaround: separate two count_len() calls, one here and the
* other below. Otherwise, compiler may generate some spills which
* harms verifier pruning. This happens in llvm12, but not llvm4.
* Below code works on both cases.
*/
if (in_sgs)
in_bw = count_len(sgs + out_sgs, in_sgs);
vs = stats.lookup(&key);
if (!vs) {
bpf_probe_read_kernel_str(newvs.driver, sizeof(newvs.driver), vq->vdev->dev.driver->name);
bpf_probe_read_kernel_str(newvs.dev, sizeof(newvs.dev), vq->vdev->dev.kobj.name);
bpf_probe_read_kernel_str(newvs.vqname, sizeof(newvs.vqname), vq->name);
newvs.out_sgs = out_sgs;
newvs.in_sgs = in_sgs;
if (out_sgs)
newvs.out_bw = count_len(sgs, out_sgs);
newvs.in_bw = in_bw;
stats.update(&key, &newvs);
} else {
vs->out_sgs += out_sgs;
vs->in_sgs += in_sgs;
if (out_sgs)
vs->out_bw += count_len(sgs, out_sgs);
vs->in_bw += in_bw;
}
}
int trace_virtqueue_add_sgs(struct pt_regs *ctx, struct virtqueue *vq,
struct scatterlist **sgs, unsigned int out_sgs,
unsigned int in_sgs, void *data, gfp_t gfp)
{
record(vq, sgs, out_sgs, in_sgs);
return 0;
}
int trace_virtqueue_add_outbuf(struct pt_regs *ctx, struct virtqueue *vq,
struct scatterlist *sg, unsigned int num,
void *data, gfp_t gfp)
{
record(vq, &sg, 1, 0);
return 0;
}
int trace_virtqueue_add_inbuf(struct pt_regs *ctx, struct virtqueue *vq,
struct scatterlist *sg, unsigned int num,
void *data, gfp_t gfp)
{
record(vq, &sg, 0, 1);
return 0;
}
int trace_virtqueue_add_inbuf_ctx(struct pt_regs *ctx, struct virtqueue *vq,
struct scatterlist *sg, unsigned int num,
void *data, void *_ctx, gfp_t gfp)
{
record(vq, &sg, 0, 1);
return 0;
}
"""
# filter for driver name
if args.driver:
bpf_text = bpf_text.replace('DRIVERFILTER',
"""char filter_driver[] = \"%s\";
char driver[16];
bpf_probe_read_kernel_str(driver, sizeof(driver), vq->vdev->dev.driver->name);
if (local_strcmp(filter_driver, driver))
return;""" % (args.driver))
else:
bpf_text = bpf_text.replace('DRIVERFILTER', '')
# filter for dev name
if args.devname:
bpf_text = bpf_text.replace('DEVNAMEFILTER',
"""char filter_devname[] = \"%s\";
char devname[16];
bpf_probe_read_kernel_str(devname, sizeof(devname), vq->vdev->dev.kobj.name);
if (local_strcmp(filter_devname, devname))
return;""" % (args.devname))
else:
bpf_text = bpf_text.replace('DEVNAMEFILTER', '')
# debug mode: print bpf text
if args.debug:
print(bpf_text)
# dump mode: print bpf text and exit
if args.ebpf:
print(bpf_text)
exit()
# load BPF program
b = BPF(text=bpf_text)
b.attach_kprobe(event="virtqueue_add_sgs", fn_name="trace_virtqueue_add_sgs")
b.attach_kprobe(event="virtqueue_add_outbuf", fn_name="trace_virtqueue_add_outbuf")
b.attach_kprobe(event="virtqueue_add_inbuf", fn_name="trace_virtqueue_add_inbuf")
b.attach_kprobe(event="virtqueue_add_inbuf_ctx", fn_name="trace_virtqueue_add_inbuf_ctx")
print("Tracing virtio devices statistics ... Hit Ctrl-C to end.")
# start main loop
exiting = 0 if args.interval else 1
seconds = 0
while (1):
try:
sleep(int(args.interval))
seconds = seconds + int(args.interval)
except KeyboardInterrupt:
exiting = 1
if args.timestamp:
print("%-8s\n" % strftime("%H:%M:%S"), end="")
else:
print("--------", end="\n")
print("%14s %8s %10s %7s %7s %14s %14s" % ("Driver", "Device", "VQ Name", "In SGs", "Out SGs", "In BW", "Out BW"))
stats = b.get_table("stats")
for k, v in sorted(stats.items(), key=lambda vs: vs[1].dev):
print("%14s %8s %10s %7d %7d %14d %14d" % (v.driver, v.dev, v.vqname, v.in_sgs, v.out_sgs, v.in_bw, v.out_bw))
stats.clear()
if exiting or seconds >= int(args.count):
exit()