From cc5c5319bfb668c31365a98f2b79b7b0a055cb47 Mon Sep 17 00:00:00 2001 From: Stephen Gregoratto Date: Mon, 20 Nov 2023 14:01:20 +1100 Subject: [PATCH] Linux: Add cachestat, fchmodat2 syscalls (#958) --- libc/calls/cachestat.c | 47 ++++++++++++ libc/calls/cachestat.h | 34 +++++++++ libc/calls/fchmodat.c | 12 ++- libc/sysv/calls/sys_cachestat.S | 2 + libc/sysv/calls/sys_fchmodat2.S | 2 + libc/sysv/syscalls.sh | 2 + test/libc/calls/cachestat_test.c | 121 +++++++++++++++++++++++++++++++ test/libc/calls/fchmodat_test.c | 68 +++++++++++++++++ 8 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 libc/calls/cachestat.c create mode 100644 libc/calls/cachestat.h create mode 100644 libc/sysv/calls/sys_cachestat.S create mode 100644 libc/sysv/calls/sys_fchmodat2.S create mode 100644 test/libc/calls/cachestat_test.c create mode 100644 test/libc/calls/fchmodat_test.c diff --git a/libc/calls/cachestat.c b/libc/calls/cachestat.c new file mode 100644 index 00000000000..b24215b02f4 --- /dev/null +++ b/libc/calls/cachestat.c @@ -0,0 +1,47 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ + +#include "libc/calls/cachestat.h" +#include "libc/intrin/strace.internal.h" + +int sys_cachestat(int, struct cachestat_range *, struct cachestat *, uint32_t); + +/** + * Query the page cache statistics of a file. + * + * @param fd The open file descriptor to retrieve statistics from. + * @param cstat_range The byte range in `fd` to query. When `len > 0`, the range + * is `[off..off + len]`. When `len` == 0, the range is from `off` to the end of + * `fd`. + * @param cstat The structure where page cache statistics are stored. + * @param flags Currently unused, and must be set to `0`. + * @return 0 on success, or -1 w/ errno. + * @raise EFAULT if `cstat_range` or `cstat` points to invalid memory + * @raise EINVAL if `flags` is nonzero + * @raise EBADF if `fd` is negative or not open + * @raise EOPNOTSUPP if `fd` refers to a hugetlbfs file + * @raise ENOSYS if not Linux 6.5 + */ +int cachestat(int fd, struct cachestat_range *cstat_range, + struct cachestat *cstat, uint32_t flags) { + int rc; + rc = sys_cachestat(fd, cstat_range, cstat, flags); + STRACE("cachestat(%d, %p, %p, %#x) → %d% m", fd, cstat_range, cstat, flags); + return rc; +} \ No newline at end of file diff --git a/libc/calls/cachestat.h b/libc/calls/cachestat.h new file mode 100644 index 00000000000..1c3cd22f35b --- /dev/null +++ b/libc/calls/cachestat.h @@ -0,0 +1,34 @@ +#ifndef COSMOPOLITAN_LIBC_CALLS_CACHESTAT_H_ +#define COSMOPOLITAN_LIBC_CALLS_CACHESTAT_H_ + +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +struct cachestat_range { + uint64_t off; + uint64_t len; +}; + +struct cachestat { + /** Number of cached pages. */ + uint64_t nr_cache; + /** Number of dirty pages */ + uint64_t nr_dirty; + /** Number of pages marked for writeback. */ + uint64_t nr_writeback; + /** Number of pages evicted from the cache. */ + uint64_t nr_evicted; + /** + * Number of recently evicted pages. + * A page is recently evicted if its last eviction was recent enough that its + * reentry to the cache would indicate that it is actively being used by the + * system, and that there is memory pressure on the system. + */ + uint64_t nr_recently_evicted; +}; + +int cachestat(int, struct cachestat_range *, struct cachestat *, uint32_t); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_CALLS_LANDLOCK_H_ */ \ No newline at end of file diff --git a/libc/calls/fchmodat.c b/libc/calls/fchmodat.c index 8e35e5c3718..c1d56792bf1 100644 --- a/libc/calls/fchmodat.c +++ b/libc/calls/fchmodat.c @@ -20,6 +20,7 @@ #include "libc/calls/syscall-nt.internal.h" #include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" +#include "libc/errno.h" #include "libc/intrin/asan.internal.h" #include "libc/intrin/describeflags.internal.h" #include "libc/intrin/strace.internal.h" @@ -28,6 +29,7 @@ #include "libc/sysv/errfuns.h" int sys_fchmodat_linux(int, const char *, unsigned, int); +int sys_fchmodat2(int, const char *, unsigned, int); /** * Changes permissions on file, e.g.: @@ -40,6 +42,9 @@ int sys_fchmodat_linux(int, const char *, unsigned, int); * @param mode contains octal flags (base 8) * @param flags can have `AT_SYMLINK_NOFOLLOW` * @raise EROFS if `dirfd` or `path` use zip file system + * @raise EOPNOTSUP on Linux if `path` is a symbolic link, `AT_SYMLINK_NOFOLLOW` + * is set in `flags`, and filesystem does not support setting the mode of + * symbolic links. * @errors ENOENT, ENOTDIR, ENOSYS * @asyncsignalsafe * @see fchmod() @@ -53,7 +58,12 @@ int fchmodat(int dirfd, const char *path, uint32_t mode, int flags) { rc = erofs(); } else if (!IsWindows()) { if (IsLinux() && flags) { - rc = sys_fchmodat_linux(dirfd, path, mode, flags); + int serrno = errno; + rc = sys_fchmodat2(dirfd, path, mode, flags); + if (rc == -1 && errno == ENOSYS) { + errno = serrno; + rc = sys_fchmodat_linux(dirfd, path, mode, flags); + } } else { rc = sys_fchmodat(dirfd, path, mode, flags); } diff --git a/libc/sysv/calls/sys_cachestat.S b/libc/sysv/calls/sys_cachestat.S new file mode 100644 index 00000000000..44475df40d7 --- /dev/null +++ b/libc/sysv/calls/sys_cachestat.S @@ -0,0 +1,2 @@ +#include "libc/sysv/macros.internal.h" +.scall sys_cachestat,0xfffffffffffff1c3,451,4095,globl diff --git a/libc/sysv/calls/sys_fchmodat2.S b/libc/sysv/calls/sys_fchmodat2.S new file mode 100644 index 00000000000..a45ef72d814 --- /dev/null +++ b/libc/sysv/calls/sys_fchmodat2.S @@ -0,0 +1,2 @@ +#include "libc/sysv/macros.internal.h" +.scall sys_fchmodat2,0xfffffffffffff1c4,452,4095,globl diff --git a/libc/sysv/syscalls.sh b/libc/sysv/syscalls.sh index c8dedf7e7ec..8fcd359a869 100755 --- a/libc/sysv/syscalls.sh +++ b/libc/sysv/syscalls.sh @@ -383,6 +383,8 @@ scall sys_memfd_secret 0xfffffffffffff1bf 0xfff globl # no wrapper scall sys_process_mrelease 0xfffffffffffff1c0 0xfff globl # no wrapper scall sys_futex_waitv 0xfffffffffffff1c1 0xfff globl # no wrapper scall sys_set_mempolicy_home_node 0xfffffffffffff1c2 0xfff globl # no wrapper +scall sys_cachestat 0xfffffffffffff1c3 0x1c3 globl # Linux 6.5+ +scall sys_fchmodat2 0xfffffffffffff1c4 0x1c4 globl # no wrapper Linux 6.6+ # The Fifth Bell System Interface, Community Edition # » besiyata dishmaya diff --git a/test/libc/calls/cachestat_test.c b/test/libc/calls/cachestat_test.c new file mode 100644 index 00000000000..ab17e046858 --- /dev/null +++ b/test/libc/calls/cachestat_test.c @@ -0,0 +1,121 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Nhat Pham │ +│ Copyright 2023 Stephen Gregoratto │ +│ │ +│ This program is free software; you can redistribute it and/or modify │ +│ it under the terms of the GNU General Public License as published by │ +│ the Free Software Foundation; version 2 of the License. │ +│ │ +│ This program is distributed in the hope that it will be useful, but │ +│ WITHOUT ANY WARRANTY; without even the implied warranty of │ +│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │ +│ General Public License for more details. │ +│ │ +│ You should have received a copy of the GNU General Public License │ +│ along with this program; if not, write to the Free Software │ +│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │ +│ 02110-1301 USA │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/cachestat.h" +#include "libc/calls/calls.h" +#include "libc/calls/struct/statfs.h" +#include "libc/dce.h" +#include "libc/errno.h" +#include "libc/intrin/kprintf.h" +#include "libc/macros.internal.h" +#include "libc/mem/gc.internal.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/sysconf.h" +#include "libc/stdio/rand.h" +#include "libc/sysv/consts/auxv.h" +#include "libc/sysv/consts/o.h" +#include "libc/testlib/testlib.h" +#include "libc/x/x.h" + +static size_t pagesize; + +bool HasCachestatSupport(void) { + return IsLinux() && cachestat(-1, 0, 0, 0) == -1 && errno == EBADF; +} + +void SetUpOnce(void) { + if (!HasCachestatSupport()) { + kprintf("warning: cachestat not supported on this systemL %m\n"); + exit(0); + } + testlib_enable_tmp_setup_teardown(); + pagesize = (size_t)getauxval(AT_PAGESZ); + // ASSERT_SYS(0, 0, pledge("stdio rpath wpath cpath", 0)); +} + +TEST(cachestat, testCachestatOnDevices) { + const char *const files[] = { + "/dev/zero", "/dev/null", "/dev/urandom", "/proc/version", "/proc", + }; + struct cachestat_range range = {0, 4 * pagesize}; + struct cachestat cs; + for (size_t i = 0; i < ARRAYLEN(files); i++) { + ASSERT_SYS(0, 3, open(files[i], O_RDONLY)); + ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0)); + ASSERT_SYS(0, 0, close(3)); + } +} + +TEST(cachestat, testCachestatAfterWrite) { + size_t size = 4 * pagesize; + char *data = gc(xmalloc(size)); + ASSERT_SYS(0, size, getrandom(data, size, 0)); + // TODO: handle EINTR like xbarf + ASSERT_SYS(0, 3, open("tmpfilecachestat", O_CREAT | O_RDWR, 0600)); + ASSERT_SYS(0, size, write(3, data, size)); + struct cachestat_range range = {0, size}; + struct cachestat cs; + ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0)); + ASSERT_EQ(4, cs.nr_cache + cs.nr_evicted, + "total number of evicted pages is off."); + ASSERT_SYS(0, 0, close(3)); +} + +#define TMPFS_MAGIC 0x01021994 +TEST(cachestat, testCachestatSyncNoDirty) { + size_t size = 4 * pagesize; + char *data = gc(xmalloc(size)); + ASSERT_SYS(0, size, getrandom(data, size, 0)); + // TODO: handle EINTR like xbarf + ASSERT_SYS(0, 3, open("tmpfilecachestat", O_CREAT | O_RDWR, 0600)); + ASSERT_SYS(0, size, write(3, data, size)); + struct cachestat_range range = {0, size}; + struct cachestat cs; + ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0)); + ASSERT_EQ(4, cs.nr_cache + cs.nr_evicted, + "total number of evicted pages is off."); + struct statfs statfs; + ASSERT_SYS(0, 0, fstatfs(3, &statfs)); + if (statfs.f_type == TMPFS_MAGIC) goto done; + ASSERT_SYS(0, 0, fsync(3)); + ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0)); + EXPECT_EQ(0, cs.nr_dirty, + "dirty pages should be zero after fsync, got %llu\n", cs.nr_dirty); +done: + ASSERT_SYS(0, 0, close(3)); +} + +TEST(cachestat, testCachestatShmem) { + size_t filesize = 512 * 2 * pagesize; // 2 2MB huge pages. + size_t compute_len = 512 * pagesize; + unsigned long num_pages = compute_len / pagesize; + char *data = gc(xmalloc(filesize)); + ASSERT_SYS(0, filesize, getrandom(data, filesize, 0)); + ASSERT_SYS(0, 3, shm_open("tmpshmcstat", O_CREAT | O_RDWR, 0600)); + ASSERT_SYS(0, 0, ftruncate(3, filesize)); + ASSERT_SYS(0, filesize, write(3, data, filesize)); + struct cachestat_range range = {pagesize, compute_len}; + struct cachestat cs; + ASSERT_SYS(0, 0, cachestat(3, &range, &cs, 0)); + ASSERT_EQ(num_pages, cs.nr_cache + cs.nr_evicted, + "total number of cached and evicted pages is off.\n"); + ASSERT_SYS(0, 0, shm_unlink("tmpshmcstat")); + ASSERT_SYS(0, 0, close(3)); +} diff --git a/test/libc/calls/fchmodat_test.c b/test/libc/calls/fchmodat_test.c new file mode 100644 index 00000000000..3e8736717e4 --- /dev/null +++ b/test/libc/calls/fchmodat_test.c @@ -0,0 +1,68 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Stephen Gregoratto │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ + +#include "libc/calls/calls.h" +#include "libc/dce.h" +#include "libc/errno.h" +#include "libc/macros.internal.h" +#include "libc/sysv/consts/s.h" +// #include "libc/mem/gc.internal.h" +#include "libc/calls/struct/stat.h" +#include "libc/runtime/runtime.h" +#include "libc/sysv/consts/at.h" +#include "libc/sysv/consts/o.h" +#include "libc/testlib/testlib.h" +#include "libc/x/x.h" + +void SetUpOnce(void) { + testlib_enable_tmp_setup_teardown(); + // ASSERT_SYS(0, 0, pledge("stdio rpath wpath cpath", 0)); +} + +static void ExpectMode(const char *filename, uint32_t mode) { + struct stat st; + ASSERT_SYS(0, 0, fstatat(AT_FDCWD, filename, &st, AT_SYMLINK_NOFOLLOW)); + ASSERT_TRUE((st.st_mode & 0777) == mode); +} + +TEST(fchmodat, testFchmodat) { + ASSERT_SYS(0, 3, + open("regfile", O_WRONLY | O_CREAT | O_EXCL | O_TRUNC, 0644)); + ASSERT_SYS(0, 0, close(3)); + ASSERT_SYS(0, 0, symlink("regfile", "symlink")); + ExpectMode("regfile", 0644); + struct stat st; + ASSERT_SYS(0, 0, fstatat(AT_FDCWD, "symlink", &st, AT_SYMLINK_NOFOLLOW)); + uint32_t sym_mode = st.st_mode & 0777; + ASSERT_SYS(0, 0, fchmodat(AT_FDCWD, "regfile", 0640, 0)); + ExpectMode("regfile", 0640); + ASSERT_SYS(0, 0, fchmodat(AT_FDCWD, "regfile", 0600, AT_SYMLINK_NOFOLLOW)); + ExpectMode("regfile", 0600); + ASSERT_SYS(0, 0, fchmodat(AT_FDCWD, "symlink", 0640, 0)); + ExpectMode("regfile", 0640); + ExpectMode("symlink", sym_mode); + int rc = fchmodat(AT_FDCWD, "symlink", 0600, AT_SYMLINK_NOFOLLOW); + if (rc == -1) { + ASSERT_TRUE(errno == ENOTSUP); + errno = 0; + } else { + ExpectMode("symlink", 0600); + } + ExpectMode("regfile", 0640); +}