From 7116c1f4fed83834c78d02ae031c3f07e64d34e3 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 22 Jun 2024 20:35:02 +0900 Subject: [PATCH 1/6] io_uring: Add support FLUSH command Add support for --fsync and --fdatasync in io_uring_cmd ioengine to enable FLUSH commands just like libaio or io_uring ioengines. If --fsync or --fdatasync is given N, FLUSH command will be issued as per N write commands. Signed-off-by: Minwoo Im --- engines/nvme.c | 5 +++++ engines/nvme.h | 1 + 2 files changed, 6 insertions(+) diff --git a/engines/nvme.c b/engines/nvme.c index 72934c8b1d..33d8747737 100644 --- a/engines/nvme.c +++ b/engines/nvme.c @@ -381,6 +381,11 @@ int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u, case DDIR_TRIM: fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm); return 0; + case DDIR_SYNC: + case DDIR_DATASYNC: + cmd->opcode = nvme_cmd_flush; + cmd->nsid = data->nsid; + return 0; default: return -ENOTSUP; } diff --git a/engines/nvme.h b/engines/nvme.h index bc2370b8d2..b5fef2fb2c 100644 --- a/engines/nvme.h +++ b/engines/nvme.h @@ -73,6 +73,7 @@ enum nvme_admin_opcode { }; enum nvme_io_opcode { + nvme_cmd_flush = 0x00, nvme_cmd_write = 0x01, nvme_cmd_read = 0x02, nvme_cmd_write_uncor = 0x04, From 87b100270bb8f77c662bc02e2ce85e12893170a4 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 10 Jul 2024 07:45:04 +0900 Subject: [PATCH 2/6] td: Rename last_ddir to last_ddir_completed `last_ddir` represents the data direction of the latest completed command. To avoid confusions, this patch renamed `last_ddir` to `last_ddir_completed` to make it much more clear. Signed-off-by: Minwoo Im --- fio.h | 2 +- io_u.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fio.h b/fio.h index 7d9927a006..c5b4df2106 100644 --- a/fio.h +++ b/fio.h @@ -259,7 +259,7 @@ struct thread_data { volatile int runstate; volatile bool terminate; bool last_was_sync; - enum fio_ddir last_ddir; + enum fio_ddir last_ddir_completed; int mmapfd; diff --git a/io_u.c b/io_u.c index a090e12122..b5c03db746 100644 --- a/io_u.c +++ b/io_u.c @@ -1757,7 +1757,7 @@ static bool check_get_trim(struct thread_data *td, struct io_u *io_u) if (get_next_trim(td, io_u)) return true; } else if (!(td->io_hist_len % td->o.trim_backlog) && - td->last_ddir != DDIR_READ) { + td->last_ddir_completed != DDIR_READ) { td->trim_batch = td->o.trim_batch; if (!td->trim_batch) td->trim_batch = td->o.trim_backlog; @@ -1779,7 +1779,7 @@ static bool check_get_verify(struct thread_data *td, struct io_u *io_u) if (td->verify_batch) get_verify = 1; else if (!(td->io_hist_len % td->o.verify_backlog) && - td->last_ddir != DDIR_READ) { + td->last_ddir_completed != DDIR_READ) { td->verify_batch = td->o.verify_batch; if (!td->verify_batch) td->verify_batch = td->o.verify_backlog; @@ -2122,7 +2122,7 @@ static void io_completed(struct thread_data *td, struct io_u **io_u_ptr, return; } - td->last_ddir = ddir; + td->last_ddir_completed = ddir; if (!io_u->error && ddir_rw(ddir)) { unsigned long long bytes = io_u->xfer_buflen - io_u->resid; From e11046bfb64bb82ac928013fe41cd022f6d529db Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 10 Jul 2024 07:48:56 +0900 Subject: [PATCH 3/6] td: Replace last_was_sync with last_ddir_issued `last_was_sync` has represented that the last command had DDIR_SYNC. This can be replaced with `ddir_sync(last_ddir_issued)` and it's much more flexible to represent the last issued command's data direction. Signed-off-by: Minwoo Im --- fio.h | 5 +++-- ioengines.c | 4 ++-- libfio.c | 1 - 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fio.h b/fio.h index c5b4df2106..4bb6cfa7f3 100644 --- a/fio.h +++ b/fio.h @@ -258,8 +258,9 @@ struct thread_data { size_t orig_buffer_size; volatile int runstate; volatile bool terminate; - bool last_was_sync; + enum fio_ddir last_ddir_completed; + enum fio_ddir last_ddir_issued; int mmapfd; @@ -629,7 +630,7 @@ static inline bool multi_range_trim(struct thread_data *td, struct io_u *io_u) static inline bool should_fsync(struct thread_data *td) { - if (td->last_was_sync) + if (ddir_sync(td->last_ddir_issued)) return false; if (td_write(td) || td->o.override_sync) return true; diff --git a/ioengines.c b/ioengines.c index 6b81dc772a..dcd4164d4e 100644 --- a/ioengines.c +++ b/ioengines.c @@ -437,7 +437,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u) td->ts.total_io_u[io_u->ddir]++; } - td->last_was_sync = ddir_sync(io_u->ddir); + td->last_ddir_issued = ddir; } else if (ret == FIO_Q_QUEUED) { td->io_u_queued++; @@ -448,7 +448,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u) if (td->io_u_queued >= td->o.iodepth_batch) td_io_commit(td); - td->last_was_sync = ddir_sync(io_u->ddir); + td->last_ddir_issued = ddir; } if (!td_ioengine_flagged(td, FIO_SYNCIO) && diff --git a/libfio.c b/libfio.c index d0c6bf8f56..2596ae5a98 100644 --- a/libfio.c +++ b/libfio.c @@ -101,7 +101,6 @@ static void reset_io_counters(struct thread_data *td, int all) td->zone_bytes = 0; - td->last_was_sync = false; td->rwmix_issues = 0; /* From 704a8cfef1d0a61c8935a5c6047fba148a766dab Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 10 Jul 2024 07:50:14 +0900 Subject: [PATCH 4/6] io_u: Ensure fsync only after write(s) When using `--rw=write --fsync=N`, the FLUSH command is correctly issued after N WRITE commands. However, if READ commands are mixed in with --rw, fsync occurs after READ commands as well. This patch ensures that fsync is only triggered after the specified number of WRITE commands, regardless of READ commands. Signed-off-by: Minwoo Im --- io_u.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_u.c b/io_u.c index b5c03db746..f8ce8618cb 100644 --- a/io_u.c +++ b/io_u.c @@ -755,7 +755,7 @@ static enum fio_ddir get_rw_ddir(struct thread_data *td) * See if it's time to fsync/fdatasync/sync_file_range first, * and if not then move on to check regular I/Os. */ - if (should_fsync(td)) { + if (should_fsync(td) && td->last_ddir_issued == DDIR_WRITE) { if (td->o.fsync_blocks && td->io_issues[DDIR_WRITE] && !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks)) return DDIR_SYNC; From cf168c5be90cc7c74114d61d2c01fa39a3cb52c7 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 23 Jun 2024 00:30:03 +0900 Subject: [PATCH 5/6] io_u: Support fsync for --rw=trimwrite Even if ddir is determined in get_rw_ddir(), ddir might be updated in set_rw_ddir(). if td represents trimwrite, it will be updated to either DDIR_TRIM or DDIR_WRITE even ddir already represents for DDIR_SYNC. To support DDIR_SYNC(fsync) for trimwrite, this patch checks ddir_sync() in case of trimwrite not to update the pre-determined ddir. Signed-off-by: Minwoo Im --- io_u.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_u.c b/io_u.c index f8ce8618cb..f81086b658 100644 --- a/io_u.c +++ b/io_u.c @@ -815,7 +815,7 @@ static void set_rw_ddir(struct thread_data *td, struct io_u *io_u) if (td->o.zone_mode == ZONE_MODE_ZBD) ddir = zbd_adjust_ddir(td, io_u, ddir); - if (td_trimwrite(td)) { + if (td_trimwrite(td) && !ddir_sync(ddir)) { struct fio_file *f = io_u->file; if (f->last_start[DDIR_WRITE] == f->last_start[DDIR_TRIM]) ddir = DDIR_TRIM; From e84adcf597ed52d94c3e48430915605b85ab1a3a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 4 Jul 2024 23:09:12 +0900 Subject: [PATCH 6/6] t/nvmept.py: Add test cases for FLUSH This test script tests number of FLUSH commands triggered by --fsync= options to make FLUSH commands are followed by the WRITE commands from the various --rw I/O workload. Signed-off-by: Minwoo Im --- t/nvmept.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/t/nvmept.py b/t/nvmept.py index 1ade64dc57..3d90f4bfcd 100755 --- a/t/nvmept.py +++ b/t/nvmept.py @@ -87,6 +87,53 @@ def check_result(self): self.passed = False +class FlushTest(FioJobCmdTest): + def setup(self, parameters): + fio_args = [ + "--name=nvmept-flush", + "--ioengine=io_uring_cmd", + "--cmd_type=nvme", + "--randrepeat=0", + f"--filename={self.fio_opts['filename']}", + f"--rw={self.fio_opts['rw']}", + f"--output={self.filenames['output']}", + f"--output-format={self.fio_opts['output-format']}", + ] + + for opt in ['fixedbufs', 'nonvectored', 'force_async', 'registerfiles', + 'sqthread_poll', 'sqthread_poll_cpu', 'hipri', 'nowait', + 'time_based', 'runtime', 'verify', 'io_size', 'num_range', + 'iodepth', 'iodepth_batch', 'iodepth_batch_complete', + 'size', 'rate', 'bs', 'bssplit', 'bsrange', 'randrepeat', + 'buffer_pattern', 'verify_pattern', 'offset', 'fdp', + 'fdp_pli', 'fdp_pli_select', 'dataplacement', 'plid_select', + 'plids', 'dp_scheme', 'number_ios', 'read_iolog', 'fsync']: + if opt in self.fio_opts: + option = f"--{opt}={self.fio_opts[opt]}" + fio_args.append(option) + + super().setup(fio_args) + + def check_result(self): + super().check_result() + + job = self.json_data['jobs'][0] + + rw = self.fio_opts['rw'] + fsync = self.fio_opts['fsync'] + + nr_write = job['write']['total_ios'] + nr_sync = job['sync']['total_ios'] + + nr_sync_exp = nr_write // fsync + + # The actual number of DDIR_SYNC issued might miss one DDIR_SYNC command + # when the last command issued was DDIR_WRITE command. + if not ((nr_sync == nr_sync_exp) or (nr_sync + 1 == nr_sync_exp)): + logging.error(f"nr_write={nr_write}, nr_sync={nr_sync}, fsync={fsync}") + self.passed = False + + TEST_LIST = [ { "test_id": 1, @@ -255,6 +302,50 @@ def check_result(self): }, "test_class": PassThruTest, }, + { + "test_id": 16, + "fio_opts": { + "rw": 'read', + "bs": 4096, + "number_ios": 10, + "fsync": 1, + "output-format": "json", + }, + "test_class": FlushTest, + }, + { + "test_id": 17, + "fio_opts": { + "rw": 'write', + "bs": 4096, + "number_ios": 10, + "fsync": 1, + "output-format": "json", + }, + "test_class": FlushTest, + }, + { + "test_id": 18, + "fio_opts": { + "rw": 'readwrite', + "bs": 4096, + "number_ios": 10, + "fsync": 1, + "output-format": "json", + }, + "test_class": FlushTest, + }, + { + "test_id": 19, + "fio_opts": { + "rw": 'trimwrite', + "bs": 4096, + "number_ios": 10, + "fsync": 1, + "output-format": "json", + }, + "test_class": FlushTest, + }, ] def parse_args():