diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 87ab3a8adeacd..556daeda36909 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -26,7 +26,7 @@ struct scx_exit_info { /* %SCX_EXIT_* - broad category of the exit reason */ enum scx_exit_kind kind; - /* exit code if gracefully exiting from BPF */ + /* exit code if gracefully exiting */ s64 exit_code; /* textual representation of the above */ @@ -561,6 +561,15 @@ struct sched_ext_ops { */ u32 exit_dump_len; + /** + * hotplug_seq - A sequence number that may be set by the scheduler to + * detect when a hotplug event has occurred during the loading process. + * If 0, no detection occurs. Otherwise, the scheduler will fail to + * load if the sequence number does not match @scx_hotplug_seq on the + * enable path. + */ + u64 hotplug_seq; + /** * name - BPF scheduler's name * @@ -796,6 +805,7 @@ static atomic_t scx_exit_kind = ATOMIC_INIT(SCX_EXIT_DONE); static struct scx_exit_info *scx_exit_info; static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0); +static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0); /* * The maximum amount of time in jiffies that a task may be runnable without @@ -2930,31 +2940,32 @@ void __scx_update_idle(struct rq *rq, bool idle) #endif } -static void hotplug_exit_sched(struct rq *rq, bool online) +static void handle_hotplug(struct rq *rq, bool online) { - scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, - "cpu %d going %s, exiting scheduler", - cpu_of(rq), online ? "online" : "offline"); + int cpu = cpu_of(rq); + + atomic_long_inc(&scx_hotplug_seq); + + if (online && SCX_HAS_OP(cpu_online)) + SCX_CALL_OP(SCX_KF_REST, cpu_online, cpu); + else if (!online && SCX_HAS_OP(cpu_offline)) + SCX_CALL_OP(SCX_KF_REST, cpu_offline, cpu); + else + scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, + "cpu %d going %s, exiting scheduler", cpu, + online ? "online" : "offline"); } static void rq_online_scx(struct rq *rq, enum rq_onoff_reason reason) { - if (reason == RQ_ONOFF_HOTPLUG) { - if (SCX_HAS_OP(cpu_online)) - SCX_CALL_OP(SCX_KF_REST, cpu_online, cpu_of(rq)); - else - hotplug_exit_sched(rq, true); - } + if (reason == RQ_ONOFF_HOTPLUG) + handle_hotplug(rq, true); } static void rq_offline_scx(struct rq *rq, enum rq_onoff_reason reason) { - if (reason == RQ_ONOFF_HOTPLUG) { - if (SCX_HAS_OP(cpu_offline)) - SCX_CALL_OP(SCX_KF_REST, cpu_offline, cpu_of(rq)); - else - hotplug_exit_sched(rq, false); - } + if (reason == RQ_ONOFF_HOTPLUG) + handle_hotplug(rq, false); } #else /* !CONFIG_SMP */ @@ -3811,10 +3822,18 @@ static ssize_t scx_attr_nr_rejected_show(struct kobject *kobj, } SCX_ATTR(nr_rejected); +static ssize_t scx_attr_hotplug_seq_show(struct kobject *kobj, + struct kobj_attribute *ka, char *buf) +{ + return sysfs_emit(buf, "%ld\n", atomic_long_read(&scx_hotplug_seq)); +} +SCX_ATTR(hotplug_seq); + static struct attribute *scx_global_attrs[] = { &scx_attr_state.attr, &scx_attr_switch_all.attr, &scx_attr_nr_rejected.attr, + &scx_attr_hotplug_seq.attr, NULL, }; @@ -4336,6 +4355,25 @@ static struct kthread_worker *scx_create_rt_helper(const char *name) return helper; } +static void check_hotplug_seq(const struct sched_ext_ops *ops) +{ + unsigned long long global_hotplug_seq; + + /* + * If a hotplug event has occurred between when a scheduler was + * initialized, and when we were able to attach, exit and notify user + * space about it. + */ + if (ops->hotplug_seq) { + global_hotplug_seq = atomic_long_read(&scx_hotplug_seq); + if (ops->hotplug_seq != global_hotplug_seq) { + scx_ops_exit(SCX_ECODE_ACT_RESTART | SCX_ECODE_RSN_HOTPLUG, + "expected hotplug seq %llu did not match actual %llu", + ops->hotplug_seq, global_hotplug_seq); + } + } +} + static int validate_ops(const struct sched_ext_ops *ops) { /* @@ -4469,6 +4507,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops) cpus_read_lock(); scx_cgroup_lock(); + check_hotplug_seq(ops); + for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++) if (((void (**)(void))ops)[i]) static_branch_enable_cpuslocked(&scx_has_op[i]); @@ -4796,6 +4836,9 @@ static int bpf_scx_init_member(const struct btf_type *t, ops->exit_dump_len = *(u32 *)(udata + moff) ?: SCX_EXIT_DUMP_DFL_LEN; return 1; + case offsetof(struct sched_ext_ops, hotplug_seq): + ops->hotplug_seq = *(u64 *)(udata + moff); + return 1; } return 0; @@ -6068,7 +6111,7 @@ static int __init scx_init(void) scx_kset = kset_create_and_add("sched_ext", &scx_uevent_ops, kernel_kobj); if (!scx_kset) { - pr_err("sched_ext: Failed to create /sys/sched_ext\n"); + pr_err("sched_ext: Failed to create /sys/kernel/sched_ext\n"); return -ENOMEM; } diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index 77ec97a0b468b..e2608ad6ec5cf 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -8,6 +8,9 @@ #define __SCX_COMPAT_H #include +#include +#include +#include struct btf *__COMPAT_vmlinux_btf __attribute__((weak)); @@ -106,16 +109,55 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field #define __COMPAT_SCX_OPS_SWITCH_PARTIAL \ __COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL") +static inline long scx_hotplug_seq(void) +{ + int fd; + char buf[32]; + ssize_t len; + long val; + + fd = open("/sys/kernel/sched_ext/hotplug_seq", O_RDONLY); + if (fd < 0) + return -ENOENT; + + len = read(fd, buf, sizeof(buf) - 1); + SCX_BUG_ON(len <= 0, "read failed (%ld)", len); + buf[len] = 0; + close(fd); + + val = strtoul(buf, NULL, 10); + SCX_BUG_ON(val < 0, "invalid num hotplug events: %lu", val); + + return val; +} + /* * struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE() * is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load * and attach it, backward compatibility is automatically maintained where * reasonable. * - * - sched_ext_ops.exit_dump_len was added later. On kernels which don't support - * it, the value is ignored and a warning is triggered if the value is - * requested to be non-zero. + * The following values were added in newer kernels: + * + * - sched_ext_ops.exit_dump_len + * o If nonzero and running on an older kernel, the value is set to zero + * and a warning is emitted + * + * - sched_ext_ops.hotplug_sqn + * o If nonzero and running on an older kernel, the scheduler will fail to + * load */ +#define SCX_OPS_OPEN(__ops_name, __scx_name) ({ \ + struct __scx_name *__skel; \ + \ + __skel = __scx_name##__open(); \ + SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \ + \ + if (__COMPAT_struct_has_field("sched_ext_ops", "hotplug_seq")) \ + __skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \ + __skel; \ +}) + #define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({ \ UEI_SET_SIZE(__skel, __ops_name, __uei_name); \ if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len") && \ diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 4b528504702ca..ce23d797a6bb1 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -15,8 +15,6 @@ #include "scx_test.h" #include "util.h" -struct hotplug *skel; - const char *online_path = "/sys/devices/system/cpu/cpu1/online"; static bool is_cpu_online(void) @@ -40,22 +38,20 @@ static enum scx_test_status setup(void **ctx) if (!is_cpu_online()) return SCX_TEST_SKIP; - skel = hotplug__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load hotplug skel"); - return SCX_TEST_FAIL; - } - return SCX_TEST_PASS; } static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined) { + struct hotplug *skel; struct bpf_link *link; long kind, code; SCX_ASSERT(is_cpu_online()); + skel = hotplug__open_and_load(); + SCX_ASSERT(skel); + /* Testing the offline -> online path, so go offline before starting */ if (onlining) toggle_online_status(0); @@ -78,6 +74,7 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined) if (!link) { SCX_ERR("Failed to attach scheduler"); + hotplug__destroy(skel); return SCX_TEST_FAIL; } @@ -93,12 +90,51 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined) toggle_online_status(1); bpf_link__destroy(link); - - UEI_RESET(skel, uei); + hotplug__destroy(skel); return SCX_TEST_PASS; } +static enum scx_test_status test_hotplug_attach(void) +{ + struct hotplug *skel; + struct bpf_link *link; + enum scx_test_status status = SCX_TEST_PASS; + long kind, code; + + SCX_ASSERT(is_cpu_online()); + SCX_ASSERT(scx_hotplug_seq() > 0); + + skel = SCX_OPS_OPEN(hotplug_nocb_ops, hotplug); + SCX_ASSERT(skel); + + SCX_OPS_LOAD(skel, hotplug_nocb_ops, hotplug, uei); + + /* + * Take the CPU offline to increment the global hotplug seq, which + * should cause attach to fail due to us setting the hotplug seq above + */ + toggle_online_status(0); + link = bpf_map__attach_struct_ops(skel->maps.hotplug_nocb_ops); + + toggle_online_status(1); + + SCX_ASSERT(link); + while (!UEI_EXITED(skel, uei)) + sched_yield(); + + kind = SCX_KIND_VAL(SCX_EXIT_UNREG_KERN); + code = SCX_ECODE_VAL(SCX_ECODE_ACT_RESTART) | + SCX_ECODE_VAL(SCX_ECODE_RSN_HOTPLUG); + SCX_EQ(UEI_KIND(skel, uei), kind); + SCX_EQ(UEI_ECODE(skel, uei), code); + + bpf_link__destroy(link); + hotplug__destroy(skel); + + return status; +} + static enum scx_test_status run(void *ctx) { @@ -114,12 +150,11 @@ static enum scx_test_status run(void *ctx) #undef HP_TEST - return SCX_TEST_PASS; + return test_hotplug_attach(); } static void cleanup(void *ctx) { - hotplug__destroy(skel); toggle_online_status(1); } diff --git a/tools/testing/selftests/sched_ext/util.h b/tools/testing/selftests/sched_ext/util.h index 4cc65df1f9204..bc13dfec1267a 100644 --- a/tools/testing/selftests/sched_ext/util.h +++ b/tools/testing/selftests/sched_ext/util.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. - * Copyright (c) 2024 Tejun Heo + * Copyright (c) 2024 David Vernet */ #ifndef __SCX_TEST_UTIL_H__