Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

scx: Add hotplug sequence number #179

Merged
merged 5 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 61 additions & 18 deletions kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct scx_exit_info {
/* %SCX_EXIT_* - broad category of the exit reason */
enum scx_exit_kind kind;

/* exit code if gracefully exiting from BPF */
/* exit code if gracefully exiting */
s64 exit_code;

/* textual representation of the above */
Expand Down Expand Up @@ -561,6 +561,15 @@ struct sched_ext_ops {
*/
u32 exit_dump_len;

/**
* hotplug_seq - A sequence number that may be set by the scheduler to
* detect when a hotplug event has occurred during the loading process.
* If 0, no detection occurs. Otherwise, the scheduler will fail to
* load if the sequence number does not match @scx_hotplug_seq on the
* enable path.
*/
u64 hotplug_seq;

/**
* name - BPF scheduler's name
*
Expand Down Expand Up @@ -796,6 +805,7 @@ static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE);
static struct scx_exit_info *scx_exit_info;

static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);

/*
* The maximum amount of time in jiffies that a task may be runnable without
Expand Down Expand Up @@ -2930,31 +2940,32 @@ void __scx_update_idle(struct rq *rq, bool idle)
#endif
}

static void hotplug_exit_sched(struct rq *rq, bool online)
static void handle_hotplug(struct rq *rq, bool online)
{
scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
"cpu %d going %s, exiting scheduler",
cpu_of(rq), online ? "online" : "offline");
int cpu = cpu_of(rq);

atomic_long_inc(&scx_hotplug_seq);

if (online && SCX_HAS_OP(cpu_online))
SCX_CALL_OP(SCX_KF_REST, cpu_online, cpu);
else if (!online && SCX_HAS_OP(cpu_offline))
SCX_CALL_OP(SCX_KF_REST, cpu_offline, cpu);
else
scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
"cpu %d going %s, exiting scheduler", cpu,
online ? "online" : "offline");
}

static void rq_online_scx(struct rq *rq, enum rq_onoff_reason reason)
{
if (reason == RQ_ONOFF_HOTPLUG) {
if (SCX_HAS_OP(cpu_online))
SCX_CALL_OP(SCX_KF_REST, cpu_online, cpu_of(rq));
else
hotplug_exit_sched(rq, true);
}
if (reason == RQ_ONOFF_HOTPLUG)
handle_hotplug(rq, true);
}

static void rq_offline_scx(struct rq *rq, enum rq_onoff_reason reason)
{
if (reason == RQ_ONOFF_HOTPLUG) {
if (SCX_HAS_OP(cpu_offline))
SCX_CALL_OP(SCX_KF_REST, cpu_offline, cpu_of(rq));
else
hotplug_exit_sched(rq, false);
}
if (reason == RQ_ONOFF_HOTPLUG)
handle_hotplug(rq, false);
}

#else /* !CONFIG_SMP */
Expand Down Expand Up @@ -3811,10 +3822,18 @@ static ssize_t scx_attr_nr_rejected_show(struct kobject *kobj,
}
SCX_ATTR(nr_rejected);

static ssize_t scx_attr_hotplug_seq_show(struct kobject *kobj,
struct kobj_attribute *ka, char *buf)
{
return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_hotplug_seq));
}
SCX_ATTR(hotplug_seq);

static struct attribute *scx_global_attrs[] = {
&scx_attr_state.attr,
&scx_attr_switch_all.attr,
&scx_attr_nr_rejected.attr,
&scx_attr_hotplug_seq.attr,
NULL,
};

Expand Down Expand Up @@ -4336,6 +4355,25 @@ static struct kthread_worker *scx_create_rt_helper(const char *name)
return helper;
}

static void check_hotplug_seq(const struct sched_ext_ops *ops)
{
unsigned long long global_hotplug_seq;

/*
* If a hotplug event has occurred between when a scheduler was
* initialized, and when we were able to attach, exit and notify user
* space about it.
*/
if (ops->hotplug_seq) {
global_hotplug_seq = atomic_long_read(&scx_hotplug_seq);
if (ops->hotplug_seq != global_hotplug_seq) {
scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG,
"expected hotplug seq %llu did not match actual %llu",
ops->hotplug_seq, global_hotplug_seq);
}
}
}

static int validate_ops(const struct sched_ext_ops *ops)
{
/*
Expand Down Expand Up @@ -4469,6 +4507,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
cpus_read_lock();
scx_cgroup_lock();

check_hotplug_seq(ops);

for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
if (((void (**)(void))ops)[i])
static_branch_enable_cpuslocked(&scx_has_op[i]);
Expand Down Expand Up @@ -4796,6 +4836,9 @@ static int bpf_scx_init_member(const struct btf_type *t,
ops->exit_dump_len =
*(u32 *)(udata + moff) ?: SCX_EXIT_DUMP_DFL_LEN;
return 1;
case offsetof(struct sched_ext_ops, hotplug_seq):
ops->hotplug_seq = *(u64 *)(udata + moff);
return 1;
}

return 0;
Expand Down Expand Up @@ -6068,7 +6111,7 @@ static int __init scx_init(void)

scx_kset = kset_create_and_add("sched_ext", &scx_uevent_ops, kernel_kobj);
if (!scx_kset) {
pr_err("sched_ext: Failed to create /sys/sched_ext\n");
pr_err("sched_ext: Failed to create /sys/kernel/sched_ext\n");
return -ENOMEM;
}

Expand Down
48 changes: 45 additions & 3 deletions tools/sched_ext/include/scx/compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#define __SCX_COMPAT_H

#include <bpf/btf.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>

struct btf *__COMPAT_vmlinux_btf __attribute__((weak));

Expand Down Expand Up @@ -106,16 +109,55 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
#define __COMPAT_SCX_OPS_SWITCH_PARTIAL \
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")

static inline long scx_hotplug_seq(void)
{
int fd;
char buf[32];
ssize_t len;
long val;

fd = open("/sys/kernel/sched_ext/hotplug_seq", O_RDONLY);
if (fd < 0)
return -ENOENT;

len = read(fd, buf, sizeof(buf) - 1);
SCX_BUG_ON(len <= 0, "read failed (%ld)", len);
buf[len] = 0;
close(fd);

val = strtoul(buf, NULL, 10);
SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val);

return val;
}

/*
* struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
* is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
* and attach it, backward compatibility is automatically maintained where
* reasonable.
*
* - sched_ext_ops.exit_dump_len was added later. On kernels which don't support
* it, the value is ignored and a warning is triggered if the value is
* requested to be non-zero.
* The following values were added in newer kernels:
*
* - sched_ext_ops.exit_dump_len
* o If nonzero and running on an older kernel, the value is set to zero
* and a warning is emitted
*
* - sched_ext_ops.hotplug_sqn
* o If nonzero and running on an older kernel, the scheduler will fail to
* load
*/
#define SCX_OPS_OPEN(__ops_name, __scx_name) ({ \
struct __scx_name *__skel; \
\
__skel = __scx_name##__open(); \
SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \
\
if (__COMPAT_struct_has_field("sched_ext_ops", "hotplug_seq")) \
__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \
__skel; \
})

#define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({ \
UEI_SET_SIZE(__skel, __ops_name, __uei_name); \
if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \
Expand Down
59 changes: 47 additions & 12 deletions tools/testing/selftests/sched_ext/hotplug.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
#include "scx_test.h"
#include "util.h"

struct hotplug *skel;

const char *online_path = "/sys/devices/system/cpu/cpu1/online";

static bool is_cpu_online(void)
Expand All @@ -40,22 +38,20 @@ static enum scx_test_status setup(void **ctx)
if (!is_cpu_online())
return SCX_TEST_SKIP;

skel = hotplug__open_and_load();
if (!skel) {
SCX_ERR("Failed to open and load hotplug skel");
return SCX_TEST_FAIL;
}

return SCX_TEST_PASS;
}

static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined)
{
struct hotplug *skel;
struct bpf_link *link;
long kind, code;

SCX_ASSERT(is_cpu_online());

skel = hotplug__open_and_load();
SCX_ASSERT(skel);

/* Testing the offline -> online path, so go offline before starting */
if (onlining)
toggle_online_status(0);
Expand All @@ -78,6 +74,7 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined)

if (!link) {
SCX_ERR("Failed to attach scheduler");
hotplug__destroy(skel);
return SCX_TEST_FAIL;
}

Expand All @@ -93,12 +90,51 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined)
toggle_online_status(1);

bpf_link__destroy(link);

UEI_RESET(skel, uei);
hotplug__destroy(skel);

return SCX_TEST_PASS;
}

static enum scx_test_status test_hotplug_attach(void)
{
struct hotplug *skel;
struct bpf_link *link;
enum scx_test_status status = SCX_TEST_PASS;
long kind, code;

SCX_ASSERT(is_cpu_online());
SCX_ASSERT(scx_hotplug_seq() > 0);

skel = SCX_OPS_OPEN(hotplug_nocb_ops, hotplug);
SCX_ASSERT(skel);

SCX_OPS_LOAD(skel, hotplug_nocb_ops, hotplug, uei);

/*
* Take the CPU offline to increment the global hotplug seq, which
* should cause attach to fail due to us setting the hotplug seq above
*/
toggle_online_status(0);
link = bpf_map__attach_struct_ops(skel->maps.hotplug_nocb_ops);

toggle_online_status(1);

SCX_ASSERT(link);
while (!UEI_EXITED(skel, uei))
sched_yield();

kind = SCX_KIND_VAL(SCX_EXIT_UNREG_KERN);
code = SCX_ECODE_VAL(SCX_ECODE_ACT_RESTART) |
SCX_ECODE_VAL(SCX_ECODE_RSN_HOTPLUG);
SCX_EQ(UEI_KIND(skel, uei), kind);
SCX_EQ(UEI_ECODE(skel, uei), code);

bpf_link__destroy(link);
hotplug__destroy(skel);

return status;
}

static enum scx_test_status run(void *ctx)
{

Expand All @@ -114,12 +150,11 @@ static enum scx_test_status run(void *ctx)

#undef HP_TEST

return SCX_TEST_PASS;
return test_hotplug_attach();
}

static void cleanup(void *ctx)
{
hotplug__destroy(skel);
toggle_online_status(1);
}

Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/sched_ext/util.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2024 Tejun Heo <[email protected]>
* Copyright (c) 2024 David Vernet <[email protected]>
*/

#ifndef __SCX_TEST_UTIL_H__
Expand Down