Skip to content

Commit

Permalink
ghost: add bpf_select_rq
Browse files Browse the repository at this point in the history
Examples for how to use it in biff.bpf.c and flux_api.bpf.c.  Flux
retains its existing behavior of wake-on-waker with TTWU_QUEUE.  Biff
does wake-on-wakee, without TTWU_QUEUE.

PiperOrigin-RevId: 507578851
  • Loading branch information
Barret Rhoden authored and dohyunkim-dev committed Mar 23, 2023
1 parent 6078146 commit d4ddad4
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 6 deletions.
1 change: 1 addition & 0 deletions bpf/user/agent.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ static int insert_prog(int ctl_fd, struct bpf_program *prog)
switch (eat & 0xFFFF) {
case BPF_GHOST_SCHED_PNT:
case BPF_GHOST_MSG_SEND:
case BPF_GHOST_SELECT_RQ:
ret = bpf_link_create(prog_fd, ctl_fd, eat, NULL);
break;
default:
Expand Down
2 changes: 2 additions & 0 deletions bpf/user/agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ extern "C" {
enum {
BPF_PROG_TYPE_GHOST_SCHED = 1000,
BPF_PROG_TYPE_GHOST_MSG,
BPF_PROG_TYPE_GHOST_SELECT_RQ,

BPF_GHOST_SCHED_PNT = 2000,
BPF_GHOST_MSG_SEND,
BPF_GHOST_SELECT_RQ,
__MAX_BPF_GHOST_ATTACH_TYPE
};

Expand Down
2 changes: 1 addition & 1 deletion kernel/ghost_uapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
* process are the same version as each other. Each successive version changes
* values in this header file, assumptions about operations in the kernel, etc.
*/
#define GHOST_VERSION 82
#define GHOST_VERSION 83

/*
* Define SCHED_GHOST via the ghost uapi unless it has already been defined
Expand Down
19 changes: 19 additions & 0 deletions kernel/vmlinux_ghost_5_11.h
Original file line number Diff line number Diff line change
Expand Up @@ -8790,6 +8790,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_LOOKUP = 30,
BPF_PROG_TYPE_GHOST_SCHED = 1000,
BPF_PROG_TYPE_GHOST_MSG = 1001,
BPF_PROG_TYPE_GHOST_SELECT_RQ = 1002,
};

enum bpf_attach_type {
Expand Down Expand Up @@ -8834,6 +8835,13 @@ enum bpf_attach_type {
__MAX_BPF_ATTACH_TYPE = 38,
};

enum {
BPF_GHOST_SCHED_PNT = 2000,
BPF_GHOST_MSG_SEND = 2001,
BPF_GHOST_SELECT_RQ = 2002,
__MAX_BPF_GHOST_ATTACH_TYPE = 2003,
};

struct sock_filter {
__u16 code;
__u8 jt;
Expand Down Expand Up @@ -33242,6 +33250,15 @@ struct bpf_ghost_sched {
__u64 next_gtid;
};

struct bpf_ghost_select_rq {
__u64 gtid;
__u32 task_cpu;
__u32 waker_cpu;
__u32 sd_flag;
__u32 wake_flags;
__u8 skip_ttwu_queue;
};

enum bpf_func_id {
BPF_FUNC_unspec = 0,
BPF_FUNC_map_lookup_elem = 1,
Expand Down Expand Up @@ -42514,6 +42531,8 @@ struct bpf_ctx_convert {
struct bpf_ghost_sched BPF_PROG_TYPE_GHOST_SCHED_kern;
struct bpf_ghost_msg BPF_PROG_TYPE_GHOST_MSG_prog;
struct bpf_ghost_msg BPF_PROG_TYPE_GHOST_MSG_kern;
struct bpf_ghost_select_rq BPF_PROG_TYPE_GHOST_SELECT_RQ_prog;
struct bpf_ghost_select_rq BPF_PROG_TYPE_GHOST_SELECT_RQ_kern;
};

struct bpf_flow_keys {
Expand Down
7 changes: 4 additions & 3 deletions schedulers/biff/biff_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ BiffScheduler::BiffScheduler(Enclave* enclave, CpuList cpulist,
BPF_PROG_TYPE_GHOST_SCHED, BPF_GHOST_SCHED_PNT);
bpf_program__set_types(bpf_obj_->progs.biff_msg_send, BPF_PROG_TYPE_GHOST_MSG,
BPF_GHOST_MSG_SEND);
bpf_program__set_types(bpf_obj_->progs.biff_select_rq,
BPF_PROG_TYPE_GHOST_SELECT_RQ, BPF_GHOST_SELECT_RQ);

bpf_obj_->rodata->enable_bpf_printd = CapHas(CAP_PERFMON);
SetBpfTopologyVars(bpf_obj_->rodata, MachineTopology());
Expand All @@ -34,6 +36,8 @@ BiffScheduler::BiffScheduler(Enclave* enclave, CpuList cpulist,
0);
CHECK_EQ(agent_bpf_register(bpf_obj_->progs.biff_msg_send,
BPF_GHOST_MSG_SEND), 0);
CHECK_EQ(agent_bpf_register(bpf_obj_->progs.biff_select_rq,
BPF_GHOST_SELECT_RQ), 0);

bpf_cpu_data_ = static_cast<struct biff_bpf_cpu_data*>(
bpf_map__mmap(bpf_obj_->maps.cpu_data));
Expand All @@ -51,9 +55,6 @@ BiffScheduler::~BiffScheduler() {
}

void BiffScheduler::EnclaveReady() {
// Biff has no cpu locality, so the remote wakeup is never worth it.
enclave()->SetWakeOnWakerCpu(true);

enclave()->SetDeliverTicks(true);
enclave()->SetDeliverCpuAvailability(true);
WRITE_ONCE(bpf_obj_->bss->initialized, true);
Expand Down
6 changes: 4 additions & 2 deletions schedulers/flux/flux_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ FluxScheduler::FluxScheduler(Enclave* enclave, CpuList cpulist,
BPF_PROG_TYPE_GHOST_SCHED, BPF_GHOST_SCHED_PNT);
bpf_program__set_types(bpf_obj_->progs.flux_msg_send, BPF_PROG_TYPE_GHOST_MSG,
BPF_GHOST_MSG_SEND);
bpf_program__set_types(bpf_obj_->progs.flux_select_rq,
BPF_PROG_TYPE_GHOST_SELECT_RQ, BPF_GHOST_SELECT_RQ);

bpf_obj_->rodata->enable_bpf_printd = CapHas(CAP_PERFMON);

Expand All @@ -51,6 +53,8 @@ FluxScheduler::FluxScheduler(Enclave* enclave, CpuList cpulist,
0);
CHECK_EQ(agent_bpf_register(bpf_obj_->progs.flux_msg_send,
BPF_GHOST_MSG_SEND), 0);
CHECK_EQ(agent_bpf_register(bpf_obj_->progs.flux_select_rq,
BPF_GHOST_SELECT_RQ), 0);

cpu_data_ = static_cast<flux_cpu*>(
bpf_map__mmap(bpf_obj_->maps.cpu_data));
Expand Down Expand Up @@ -80,8 +84,6 @@ FluxScheduler::~FluxScheduler() {
}

void FluxScheduler::EnclaveReady() {
enclave()->SetWakeOnWakerCpu(true);

enclave()->SetDeliverTicks(true);
enclave()->SetDeliverCpuAvailability(true);
// We learn about cpu availability via a message. Some cpus may currently be
Expand Down
24 changes: 24 additions & 0 deletions third_party/bpf/biff.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -656,4 +656,28 @@ int biff_msg_send(struct bpf_ghost_msg *msg)
return 1;
}

SEC("ghost_select_rq/select_rq")
int biff_select_rq(struct bpf_ghost_select_rq *ctx)
{
u64 gtid = ctx->gtid;
/* Can't pass ctx->gtid to gtid_to_thread (swd) directly. (verifier) */
struct biff_bpf_sw_data *t = gtid_to_swd(gtid);

if (!t) {
bpf_printd("Got select_rq without a task!");
return -1;
}

/*
* POLICY
*
* Not necessarily a good policy. The combo of skip + picking the
* task_cpu will grab remote cpus RQ locks for remote wakeups. This is
* just an example of what you can do.
*/
ctx->skip_ttwu_queue = true;

return ctx->task_cpu;
}

char LICENSE[] SEC("license") = "GPL";
6 changes: 6 additions & 0 deletions third_party/bpf/flux_api.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -806,4 +806,10 @@ int flux_msg_send(struct bpf_ghost_msg *msg)
return 1;
}

SEC("ghost_select_rq/select_rq")
int flux_select_rq(struct bpf_ghost_select_rq *ctx)
{
return ctx->waker_cpu;
}

char LICENSE[] SEC("license") = "GPL";

0 comments on commit d4ddad4

Please sign in to comment.