diff --git a/kmod/src/data.c b/kmod/src/data.c index 2c0822dbb..7d8d6a8a3 100644 --- a/kmod/src/data.c +++ b/kmod/src/data.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "format.h" #include "super.h" @@ -471,6 +472,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode, ext->map = blkno; ext->flags = 0; ret = 0; + out: if (ret < 0 && blkno > 0) { err = scoutfs_free_data(sb, datinf->alloc, datinf->wri, @@ -488,8 +490,68 @@ static int alloc_block(struct super_block *sb, struct inode *inode, return ret; } +static int alloc_block_dio(struct super_block *sb, struct inode *inode, + struct scoutfs_extent *ext, struct buffer_head *bh, + u64 iblock, struct scoutfs_lock *lock) +{ + DECLARE_DATA_INFO(sb, datinf); + const u64 ino = scoutfs_ino(inode); + struct data_ext_args args = { + .ino = ino, + .inode = inode, + .lock = lock, + }; + u64 blkno = 0; + u64 blocks = 0; + u64 count = 0; + u64 last; + u8 ext_fl = 0; + int ret = 0; + bool first = true; + int err; + + last = (bh->b_size - 1) >> SCOUTFS_BLOCK_SM_SHIFT; + + while(blocks < last) { + if (ext->len >= last && first) + count = min_t(u64, last, SCOUTFS_FALLOCATE_ALLOC_LIMIT); + else + count = min_t(u64, last - blocks, SCOUTFS_FALLOCATE_ALLOC_LIMIT); + + mutex_lock(&datinf->mutex); + ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri, + &datinf->dalloc, count, &blkno, &count); + if (ret == 0) { + ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock, + count, blkno, + ext_fl | SEF_UNWRITTEN); + if (ret < 0) { + err = scoutfs_free_data(sb, datinf->alloc, + datinf->wri, + &datinf->data_freed, + blkno, count); + BUG_ON(err); /* inconsistent */ + } + } + + mutex_unlock(&datinf->mutex); + + if (ret < 0) + break; + + blocks += count; + first = false; + ret = scoutfs_ext_next(sb, &data_ext_ops, &args, + iblock, 1, ext); + if (ret < 0) + break; + } + + return ret; +} + static int scoutfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) + struct buffer_head *bh, int create, bool dio_flag) { struct scoutfs_inode_info *si = SCOUTFS_I(inode); const u64 ino = scoutfs_ino(inode); @@ -530,7 +592,7 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock, } /* convert unwritten to written, could be staging */ - if (create && ext.map && (ext.flags & SEF_UNWRITTEN)) { + if (create && ext.map && !dio_flag && (ext.flags & SEF_UNWRITTEN)) { un.start = iblock; un.len = 1; un.map = ext.map + (iblock - ext.start); @@ -542,11 +604,26 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock, set_buffer_new(bh); } goto out; + } else if (create && ext.map && dio_flag) { + un.start = iblock; + un.len = 1; + un.map = ext.map + (iblock - ext.start); + un.flags = ext.flags; + ret = scoutfs_ext_set(sb, &data_ext_ops, &args, + un.start, un.len, un.map, un.flags); + if (ret == 0) { + ext = un; + set_buffer_new(bh); + } + goto out; } /* allocate and map blocks containing our logical block */ if (create && !ext.map) { - ret = alloc_block(sb, inode, &ext, iblock, lock); + if (dio_flag) + ret = alloc_block_dio(sb, inode, &ext, bh, iblock, lock); + else + ret = alloc_block(sb, inode, &ext, iblock, lock); if (ret == 0) set_buffer_new(bh); } else { @@ -580,22 +657,72 @@ static int scoutfs_get_block_read(struct inode *inode, sector_t iblock, int ret; down_read(&si->extent_sem); - ret = scoutfs_get_block(inode, iblock, bh, create); + ret = scoutfs_get_block(inode, iblock, bh, create, false); up_read(&si->extent_sem); return ret; } -static int scoutfs_get_block_write(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) +int scoutfs_get_block_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + int ret; + + down_write(&si->extent_sem); + ret = scoutfs_get_block(inode, iblock, bh, create, false); + up_write(&si->extent_sem); + + return ret; +} + + +static int scoutfs_get_block_write_dio(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) { struct scoutfs_inode_info *si = SCOUTFS_I(inode); + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *lock = NULL; + LIST_HEAD(ind_locks); int ret; + lock = scoutfs_per_task_get(&si->pt_data_lock); + if (WARN_ON_ONCE(!lock)) { + return -EINVAL; + } + + if (inode) + ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, + true, false); + else + ret = scoutfs_hold_trans(sb, false); + if (ret) + goto out; + + if (inode) + ret = scoutfs_dirty_inode_item(inode, lock); + + if (ret < 0) + goto out_unlock; + down_write(&si->extent_sem); - ret = scoutfs_get_block(inode, iblock, bh, create); + ret = scoutfs_get_block(inode, iblock, bh, create, true); up_write(&si->extent_sem); + if (inode) { + scoutfs_inode_set_data_seq(inode); + scoutfs_inode_inc_data_version(inode); + inode_inc_iversion(inode); + + if (ret > 0) + i_size_write(inode, ret); + scoutfs_update_inode_item(inode, lock, &ind_locks); + } + +out_unlock: + scoutfs_release_trans(sb); + scoutfs_inode_index_unlock(sb, &ind_locks); +out: return ret; } @@ -861,6 +988,154 @@ static int scoutfs_write_end(struct file *file, struct address_space *mapping, return ret; } +static s64 convert_unwritten_items(struct super_block *sb, struct inode *inode, + u64 ino, u64 iblock, u64 last, + struct scoutfs_lock *lock) +{ + struct data_ext_args args = { + .ino = ino, + .inode = inode, + .lock = lock, + }; + struct scoutfs_extent ext; + struct scoutfs_extent un; + u64 offset; + s64 ret; + int i; + + ret = 0; + + for (i = 0; iblock <= last; i++) { + if (i == EXTENTS_PER_HOLD) { + ret = iblock; + break; + } + + ret = scoutfs_ext_next(sb, &data_ext_ops, &args, + iblock, 1, &ext); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + break; + } + + /* done if we went past the region */ + if (ext.start > last) { + ret = 0; + break; + } + + /* nothing to do when already marked written */ + if (!(ext.flags & SEF_UNWRITTEN)) { + iblock = ext.start + ext.len; + continue; + } + + iblock = max(ext.start, iblock); + offset = iblock - ext.start; + + un.start = iblock; + un.map = ext.map ? ext.map + offset : 0; + un.len = min(ext.len - offset, last - iblock + 1); + un.flags = ext.flags & ~(SEF_OFFLINE|SEF_UNWRITTEN); + + ret = scoutfs_ext_set(sb, &data_ext_ops, &args, + un.start, un.len, un.map, un.flags); + if (ret < 0) + break; + + iblock += un.len; + } + + return ret; +} + +static ssize_t +convert_unwritten_extent(struct inode *inode, loff_t offset, ssize_t count) +{ + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + struct super_block *sb = inode->i_sb; + const u64 ino = scoutfs_ino(inode); + struct scoutfs_lock *lock = NULL; + LIST_HEAD(ind_locks); + u64 iblock; + u64 last; + ssize_t ret = 0; + + lock = scoutfs_per_task_get(&si->pt_data_lock); + if (WARN_ON_ONCE(!lock)) { + ret = -EINVAL; + goto out; + } + + iblock = offset >> SCOUTFS_BLOCK_SM_SHIFT; + last = (offset + count - 1) >> SCOUTFS_BLOCK_SM_SHIFT; + while(iblock <= last) { + if (inode) + ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, + true, false); + else + ret = scoutfs_hold_trans(sb, false); + if (ret) + break; + + if (inode) + ret = scoutfs_dirty_inode_item(inode, lock); + else + ret = 0; + + if (ret == 0) { + down_write(&si->extent_sem); + ret = convert_unwritten_items(sb, inode, ino, iblock, + last, lock); + up_write(&si->extent_sem); + } + + if (ret < 0) + goto out; + + if (inode) { + scoutfs_inode_set_data_seq(inode); + scoutfs_inode_inc_data_version(inode); + inode_inc_iversion(inode); + + if (ret > 0) + i_size_write(inode, ret); + scoutfs_update_inode_item(inode, lock, &ind_locks); + } + scoutfs_release_trans(sb); + if (inode) + scoutfs_inode_index_unlock(sb, &ind_locks); + + if (ret <= 0) + break; + + iblock = ret; + } + +out: + return ret; +} + +static ssize_t +scoutfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + + ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + scoutfs_get_block_write_dio); + if (ret > 0 && (rw & WRITE)) + { + ret = convert_unwritten_extent(inode, offset, ret); + } + + return ret; +} + /* * Try to allocate unwritten extents for any unallocated regions of the * logical block extent from the caller. The caller manages locks and @@ -1473,13 +1748,14 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, iblock = start >> SCOUTFS_BLOCK_SM_SHIFT; last = (start + len - 1) >> SCOUTFS_BLOCK_SM_SHIFT; - while (iblock <= last) { + while (true) { ret = scoutfs_ext_next(sb, &data_ext_ops, &args, iblock, 1, &ext); if (ret < 0) { - if (ret == -ENOENT) + if (ret == -ENOENT) { ret = 0; - last_flags = FIEMAP_EXTENT_LAST; + last_flags = FIEMAP_EXTENT_LAST; + } break; } @@ -1804,6 +2080,7 @@ const struct address_space_operations scoutfs_file_aops = { .writepages = scoutfs_writepages, .write_begin = scoutfs_write_begin, .write_end = scoutfs_write_end, + .direct_IO = scoutfs_direct_IO, }; const struct file_operations scoutfs_file_fops = { diff --git a/kmod/src/data.h b/kmod/src/data.h index c056915e2..2d4a119a9 100644 --- a/kmod/src/data.h +++ b/kmod/src/data.h @@ -49,6 +49,8 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode, int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); +int scoutfs_get_block_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create); int scoutfs_data_init_offline_extent(struct inode *inode, u64 size, struct scoutfs_lock *lock); int scoutfs_data_move_blocks(struct inode *from, u64 from_off, diff --git a/kmod/src/file.c b/kmod/src/file.c index 586d77fda..9ee31de81 100644 --- a/kmod/src/file.c +++ b/kmod/src/file.c @@ -47,6 +47,9 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, DECLARE_DATA_WAIT(dw); int ret; + if (!is_sync_kiocb(iocb)) + return -EINVAL; + retry: /* protect checked extents from release */ mutex_lock(&inode->i_mutex); @@ -97,6 +100,9 @@ ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, DECLARE_DATA_WAIT(dw); int ret; + if (!is_sync_kiocb(iocb)) + return -EINVAL; + if (iocb->ki_left == 0) /* Does this even happen? */ return 0; diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 9d22f52a7..7d007688b 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "format.h" #include "super.h" @@ -354,15 +355,22 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock, { struct scoutfs_inode_info *si = SCOUTFS_I(inode); struct super_block *sb = inode->i_sb; + SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent); LIST_HEAD(ind_locks); int ret; if (!S_ISREG(inode->i_mode)) return 0; + scoutfs_per_task_add(&si->pt_data_lock, &pt_ent, lock); + ret = block_truncate_page(inode->i_mapping, new_size, scoutfs_get_block_write); + scoutfs_per_task_del(&si->pt_data_lock, &pt_ent); + if (ret) + goto out; + ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false); if (ret) - return ret; + goto out; if (new_size != i_size_read(inode)) scoutfs_inode_inc_data_version(inode); @@ -378,6 +386,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock, scoutfs_release_trans(sb); scoutfs_inode_index_unlock(sb, &ind_locks); +out: return ret; } diff --git a/tests/golden/xfstests b/tests/golden/xfstests index 9a818f512..b598b5007 100644 --- a/tests/golden/xfstests +++ b/tests/golden/xfstests @@ -45,9 +45,14 @@ generic/107 generic/117 generic/124 generic/129 +generic/130 generic/131 +generic/135 generic/169 generic/184 +generic/211 +generic/212 +generic/214 generic/221 generic/228 generic/236 @@ -100,13 +105,9 @@ generic/078 generic/079 generic/081 generic/082 -generic/091 -generic/094 generic/096 generic/110 generic/111 -generic/113 -generic/114 generic/115 generic/116 generic/118 @@ -115,9 +116,7 @@ generic/121 generic/122 generic/123 generic/128 -generic/130 generic/134 -generic/135 generic/136 generic/138 generic/139 @@ -165,7 +164,6 @@ generic/194 generic/195 generic/196 generic/197 -generic/198 generic/199 generic/200 generic/201 @@ -173,11 +171,6 @@ generic/202 generic/203 generic/205 generic/206 -generic/207 -generic/210 -generic/211 -generic/212 -generic/214 generic/216 generic/217 generic/218 @@ -185,13 +178,11 @@ generic/219 generic/220 generic/222 generic/223 -generic/225 generic/227 generic/229 generic/230 generic/235 generic/238 -generic/240 generic/244 generic/250 generic/252 @@ -203,7 +194,6 @@ generic/259 generic/260 generic/261 generic/262 -generic/263 generic/264 generic/265 generic/266 @@ -282,4 +272,4 @@ shared/004 shared/032 shared/051 shared/289 -Passed all 75 tests +Passed all 80 tests diff --git a/tests/tests/xfstests.sh b/tests/tests/xfstests.sh index 140a38191..0a4df47ff 100644 --- a/tests/tests/xfstests.sh +++ b/tests/tests/xfstests.sh @@ -64,19 +64,29 @@ generic/029 # mmap missing generic/030 # mmap missing generic/075 # file content mismatch failures (fds, etc) generic/080 # mmap missing +generic/091 # skip fsx tests +generic/094 # odirect streaming pre-alloc treated as failure in xfstests generic/103 # enospc causes trans commit failures generic/105 # needs trigage: something about acls generic/108 # mount fails on failing device? generic/112 # file content mismatch failures (fds, etc) +generic/113 # block aio dio runs +generic/114 # block aio dio runs generic/120 # (can't exec 'cause no mmap) generic/126 # (can't exec 'cause no mmap) generic/141 # mmap missing +generic/198 # block aio dio runs +generic/207 # block aio dio runs +generic/210 # block aio dio runs generic/213 # enospc causes trans commit failures generic/215 # mmap missing +generic/225 # odirect streaming pre-alloc treated as failure in xfstests generic/237 # wrong error return from failing setfacl? +generic/240 # block aio dio runs generic/246 # mmap missing generic/247 # mmap missing generic/248 # mmap missing +generic/263 # do not support allocate mode FALLOC_FL_PUNCH_HOLE, FALLOC_FL_KEEP_SIZE, FALLOC_FL_ZERO_RANGE... generic/319 # utils output change? update branch? generic/321 # requires selinux enabled for '+' in ls? generic/325 # mmap missing