diff --git a/go.mod b/go.mod index 8e5eca15fb01..6c5fb093759e 100644 --- a/go.mod +++ b/go.mod @@ -255,7 +255,7 @@ require ( replace github.com/minio/minio v0.0.0-20210206053228-97fe57bba92c => github.com/juicedata/minio v0.0.0-20231213085529-c243663574ba -replace github.com/hanwen/go-fuse/v2 v2.1.1-0.20210611132105-24a1dfe6b4f8 => github.com/juicedata/go-fuse/v2 v2.1.1-0.20230726081302-124dbfa991d7 +replace github.com/hanwen/go-fuse/v2 v2.1.1-0.20210611132105-24a1dfe6b4f8 => github.com/juicedata/go-fuse/v2 v2.1.1-0.20240202080323-002ef792942e replace github.com/dgrijalva/jwt-go v3.2.0+incompatible => github.com/golang-jwt/jwt v3.2.1+incompatible diff --git a/go.sum b/go.sum index c8ca53d478f3..12193292fea6 100644 --- a/go.sum +++ b/go.sum @@ -622,8 +622,8 @@ github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/juicedata/cli/v2 v2.19.4-0.20230605075551-9c9c5c0dce83 h1:RyHTka3jCnTaUqfRYjlwcQlr53aasmkvHEbYLXthqr8= github.com/juicedata/cli/v2 v2.19.4-0.20230605075551-9c9c5c0dce83/go.mod h1:1CNUng3PtjQMtRzJO4FMXBQvkGtuYRxxiR9xMa7jMwI= -github.com/juicedata/go-fuse/v2 v2.1.1-0.20230726081302-124dbfa991d7 h1:4evzoVz1/AZfk9tqxWdzVYTMl2dC7VjEJHfaSFDrKS8= -github.com/juicedata/go-fuse/v2 v2.1.1-0.20230726081302-124dbfa991d7/go.mod h1:B1nGE/6RBFyBRC1RRnf23UpwCdyJ31eukw34oAKukAc= +github.com/juicedata/go-fuse/v2 v2.1.1-0.20240202080323-002ef792942e h1:Oj9V2Losi2fVgVjQ4my0zDbw4F+Ry3VOnfTRCiWwy7Q= +github.com/juicedata/go-fuse/v2 v2.1.1-0.20240202080323-002ef792942e/go.mod h1:B1nGE/6RBFyBRC1RRnf23UpwCdyJ31eukw34oAKukAc= github.com/juicedata/go-nfs-client v0.0.0-20231018052507-dbca444fa7e8 h1:mVVipCbohnzKZPiHGzFncLKEJZpypqjpGr4End2PP48= github.com/juicedata/go-nfs-client v0.0.0-20231018052507-dbca444fa7e8/go.mod h1:xOMqi3lOrcGe9uZLnSzgaq94Vc3oz6VPCNDLJUnXpKs= github.com/juicedata/godaemon v0.0.0-20210629045518-3da5144a127d h1:kpQMvNZJKGY3PTt7OSoahYc4nM0HY67SvK0YyS0GLwA= diff --git a/pkg/acl/cache.go b/pkg/acl/cache.go index d39fbc82b1f9..bdd726c4d2dc 100644 --- a/pkg/acl/cache.go +++ b/pkg/acl/cache.go @@ -33,7 +33,8 @@ type Cache interface { Get(id uint32) *Rule GetId(r *Rule) uint32 Size() int - GetMissIds(maxId uint32) []uint32 + GetMissIds() []uint32 + Clear() } func NewCache() Cache { @@ -52,30 +53,27 @@ type cache struct { cksum2Id map[uint32][]uint32 } -// GetMissIds return all miss ids from 1 to max(maxId, c.maxId) -func (c *cache) GetMissIds(maxId uint32) []uint32 { +func (c *cache) Clear() { + c.lock.Lock() + defer c.lock.Unlock() + c.maxId = None + c.id2Rule = make(map[uint32]*Rule) + c.cksum2Id = make(map[uint32][]uint32) +} + +// GetMissIds return all miss ids from 1 to c.maxId +func (c *cache) GetMissIds() []uint32 { c.lock.RLock() defer c.lock.RUnlock() - if c.maxId == maxId && uint32(len(c.id2Rule)) == maxId { + if uint32(len(c.id2Rule)) == c.maxId { return nil } - if c.maxId > maxId { - maxId = c.maxId - } - - n := maxId + 1 - mark := make([]bool, n) - for i := uint32(1); i < n; i++ { - if _, ok := c.id2Rule[i]; ok { - mark[i] = true - } - } - + n := c.maxId + 1 var ret []uint32 for i := uint32(1); i < n; i++ { - if !mark[i] { + if _, ok := c.id2Rule[i]; !ok { ret = append(ret, i) } } diff --git a/pkg/acl/cache_test.go b/pkg/acl/cache_test.go index 760d5d529185..5dc267f731c3 100644 --- a/pkg/acl/cache_test.go +++ b/pkg/acl/cache_test.go @@ -66,8 +66,7 @@ func TestCache(t *testing.T) { assert.Equal(t, uint32(3), c.GetId(rule2)) c.Put(8, rule2) - assert.Equal(t, []uint32{4, 5, 6, 7, 9, 10}, c.GetMissIds(10)) - assert.Equal(t, []uint32{4, 5, 6, 7}, c.GetMissIds(6)) + assert.Equal(t, []uint32{4, 5, 6, 7}, c.GetMissIds()) assert.NotPanics(t, func() { c.Put(10, nil) diff --git a/pkg/fuse/fuse.go b/pkg/fuse/fuse.go index 79d920688172..20758f45218c 100644 --- a/pkg/fuse/fuse.go +++ b/pkg/fuse/fuse.go @@ -224,7 +224,7 @@ func (fs *fileSystem) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, func (fs *fileSystem) Create(cancel <-chan struct{}, in *fuse.CreateIn, name string, out *fuse.CreateOut) (code fuse.Status) { ctx := fs.newContext(cancel, &in.InHeader) defer releaseContext(ctx) - entry, fh, err := fs.v.Create(ctx, Ino(in.NodeId), name, uint16(in.Mode), 0, in.Flags) + entry, fh, err := fs.v.Create(ctx, Ino(in.NodeId), name, uint16(in.Mode), getCreateUmask(in), in.Flags) if err != 0 { return fuse.Status(err) } @@ -447,6 +447,7 @@ func Serve(v *vfs.VFS, options string, xattrs, ioctl bool) error { opt.MaxBackground = 50 opt.EnableLocks = true opt.EnableAcl = conf.Format.EnableACL + opt.DontUmask = conf.Format.EnableACL opt.DisableXAttrs = !xattrs opt.EnableIoctl = ioctl opt.MaxWrite = 1 << 20 diff --git a/pkg/fuse/fuse_darwin.go b/pkg/fuse/fuse_darwin.go index db7fa1fc7e44..d6bc4cae8154 100644 --- a/pkg/fuse/fuse_darwin.go +++ b/pkg/fuse/fuse_darwin.go @@ -24,5 +24,9 @@ func getUmask(in *fuse.MknodIn) uint16 { return 0 } +func getCreateUmask(in *fuse.CreateIn) uint16 { + return 0 +} + func setBlksize(out *fuse.Attr, size uint32) { } diff --git a/pkg/fuse/fuse_linux.go b/pkg/fuse/fuse_linux.go index d6988f9a06e2..171b89403a31 100644 --- a/pkg/fuse/fuse_linux.go +++ b/pkg/fuse/fuse_linux.go @@ -20,6 +20,10 @@ import ( "github.com/hanwen/go-fuse/v2/fuse" ) +func getCreateUmask(in *fuse.CreateIn) uint16 { + return uint16(in.Umask) +} + func getUmask(in *fuse.MknodIn) uint16 { return uint16(in.Umask) } diff --git a/pkg/meta/base.go b/pkg/meta/base.go index 02b8cc0fa79a..4e7ae31495da 100644 --- a/pkg/meta/base.go +++ b/pkg/meta/base.go @@ -32,6 +32,7 @@ import ( "time" "github.com/dustin/go-humanize" + aclAPI "github.com/juicedata/juicefs/pkg/acl" "github.com/juicedata/juicefs/pkg/utils" "github.com/juicedata/juicefs/pkg/version" "github.com/pkg/errors" @@ -112,6 +113,10 @@ type engine interface { scanPendingFiles(Context, pendingFileScan) error GetSession(sid uint64, detail bool) (*Session, error) + + doSetFacl(ctx Context, ino Ino, aclType uint8, n *aclAPI.Rule) syscall.Errno + doGetFacl(ctx Context, ino Ino, aclType uint8, n *aclAPI.Rule) syscall.Errno + cacheACLs(ctx Context) error } type trashSliceScan func(ss []Slice, ts int64) (clean bool, err error) @@ -162,6 +167,7 @@ type baseMeta struct { reloadCb []func(*Format) umounting bool sesMu sync.Mutex + aclCache aclAPI.Cache dirStatsLock sync.Mutex dirStats map[Ino]dirStat @@ -202,6 +208,7 @@ func newBaseMeta(addr string, conf *Config) *baseMeta { msgCallbacks: &msgCallbacks{ callbacks: make(map[uint32]MsgCallback), }, + aclCache: aclAPI.NewCache(), usedSpaceG: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "used_space", @@ -477,6 +484,13 @@ func (m *baseMeta) newSessionInfo() []byte { func (m *baseMeta) NewSession(record bool) error { go m.refresh() + + if m.getFormat().EnableACL { + if err := m.en.cacheACLs(Background); err != nil { + return err + } + } + if m.conf.ReadOnly { logger.Infof("Create read-only session OK with version: %s", version.Version()) return nil @@ -1164,11 +1178,19 @@ func (m *baseMeta) parseAttr(buf []byte, attr *Attr) { attr.Parent = Ino(rb.Get64()) } attr.Full = true + if rb.Left() >= 8 { + attr.AccessACL = rb.Get32() + attr.DefaultACL = rb.Get32() + } logger.Tracef("attr: %+v -> %+v", buf, attr) } func (m *baseMeta) marshal(attr *Attr) []byte { - w := utils.NewBuffer(36 + 24 + 4 + 8) + size := uint32(36 + 24 + 4 + 8) + if attr.AccessACL|attr.DefaultACL != aclAPI.None { + size += 8 + } + w := utils.NewBuffer(size) w.Put8(attr.Flags) w.Put16((uint16(attr.Typ) << 12) | (attr.Mode & 0xfff)) w.Put32(attr.Uid) @@ -1183,6 +1205,10 @@ func (m *baseMeta) marshal(attr *Attr) []byte { w.Put64(attr.Length) w.Put32(attr.Rdev) w.Put64(uint64(attr.Parent)) + if attr.AccessACL+attr.DefaultACL > 0 { + w.Put32(attr.AccessACL) + w.Put32(attr.DefaultACL) + } logger.Tracef("attr: %+v -> %+v", attr, w.Bytes()) return w.Bytes() } @@ -2696,7 +2722,7 @@ LOOP: return eno } -func (m *baseMeta) mergeAttr(ctx Context, inode Ino, set uint16, cur, attr *Attr, now time.Time) (*Attr, syscall.Errno) { +func (m *baseMeta) mergeAttr(ctx Context, inode Ino, set uint16, cur, attr *Attr, now time.Time, rule *aclAPI.Rule) (*Attr, syscall.Errno) { dirtyAttr := *cur if (set&(SetAttrUID|SetAttrGID)) != 0 && (set&SetAttrMode) != 0 { attr.Mode |= (cur.Mode & 06000) @@ -2731,7 +2757,12 @@ func (m *baseMeta) mergeAttr(ctx Context, inode Ino, set uint16, cur, attr *Attr attr.Mode &= 05777 } } - if attr.Mode != cur.Mode { + + if rule != nil { + rule.SetMode(attr.Mode) + dirtyAttr.Mode = attr.Mode&07000 | rule.GetMode() + changed = true + } else if attr.Mode != cur.Mode { if ctx.Uid() != 0 && ctx.Uid() != cur.Uid && (cur.Mode&01777 != attr.Mode&01777 || attr.Mode&02000 > cur.Mode&02000 || attr.Mode&04000 > cur.Mode&04000) { return nil, syscall.EPERM @@ -2793,6 +2824,89 @@ func (m *baseMeta) CheckSetAttr(ctx Context, inode Ino, set uint16, attr Attr) s if st := m.en.doGetAttr(ctx, inode, &cur); st != 0 { return st } - _, st := m.mergeAttr(ctx, inode, set, &cur, &attr, time.Now()) + _, st := m.mergeAttr(ctx, inode, set, &cur, &attr, time.Now(), nil) return st } + +var errACLNotInCache = errors.New("acl not in cache") + +func (m *baseMeta) getFaclFromCache(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) error { + ino = m.checkRoot(ino) + cAttr := &Attr{} + if m.conf.OpenCache > 0 && m.of.Check(ino, cAttr) { + aclId := getAttrACLId(cAttr, aclType) + if aclId == aclAPI.None { + return ENOATTR + } + + if cRule := m.aclCache.Get(aclId); cRule != nil { + *rule = *cRule + return nil + } + } + return errACLNotInCache +} + +func setAttrACLId(attr *Attr, aclType uint8, id uint32) { + switch aclType { + case aclAPI.TypeAccess: + attr.AccessACL = id + case aclAPI.TypeDefault: + attr.DefaultACL = id + } +} + +func getAttrACLId(attr *Attr, aclType uint8) uint32 { + switch aclType { + case aclAPI.TypeAccess: + return attr.AccessACL + case aclAPI.TypeDefault: + return attr.DefaultACL + } + return aclAPI.None +} + +func setXAttrACL(xattrs *[]byte, accessACL, defaultACL uint32) { + if accessACL != aclAPI.None { + *xattrs = append(*xattrs, []byte("system.posix_acl_access")...) + *xattrs = append(*xattrs, 0) + } + if defaultACL != aclAPI.None { + *xattrs = append(*xattrs, []byte("system.posix_acl_default")...) + *xattrs = append(*xattrs, 0) + } +} + +func (m *baseMeta) SetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno { + if aclType != aclAPI.TypeAccess && aclType != aclAPI.TypeDefault { + return syscall.EINVAL + } + + if !ino.IsNormal() { + return syscall.EPERM + } + + now := time.Now() + defer func() { + m.timeit("SetFacl", now) + m.of.InvalidateChunk(ino, invalidateAttrOnly) + }() + + return m.en.doSetFacl(ctx, ino, aclType, rule) +} + +func (m *baseMeta) GetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno { + var err error + if err = m.getFaclFromCache(ctx, ino, aclType, rule); err == nil { + return 0 + } + + if !errors.Is(err, errACLNotInCache) { + return errno(err) + } + + now := time.Now() + defer m.timeit("GetFacl", now) + + return m.en.doGetFacl(ctx, ino, aclType, rule) +} diff --git a/pkg/meta/base_test.go b/pkg/meta/base_test.go index c8ed8e546849..72268c620fbe 100644 --- a/pkg/meta/base_test.go +++ b/pkg/meta/base_test.go @@ -34,9 +34,12 @@ import ( "testing" "time" + "xorm.io/xorm" + + aclAPI "github.com/juicedata/juicefs/pkg/acl" "github.com/juicedata/juicefs/pkg/utils" "github.com/redis/go-redis/v9" - "xorm.io/xorm" + "github.com/stretchr/testify/assert" ) func testConfig() *Config { @@ -146,10 +149,194 @@ func testMeta(t *testing.T, m Meta) { testCheckAndRepair(t, m) testDirStat(t, m) testClone(t, m) + testACL(t, m) base.conf.ReadOnly = true testReadOnly(t, m) } +func testACL(t *testing.T, m Meta) { + format := testFormat() + format.EnableACL = true + + if err := m.Init(format, false); err != nil { + t.Fatalf("test acl failed: %s", err) + } + + ctx := Background + testDir := "test_dir" + var testDirIno Ino + attr1 := &Attr{} + + if st := m.Mkdir(ctx, RootInode, testDir, 0644, 0, 0, &testDirIno, attr1); st != 0 { + t.Fatalf("create %s: %s", testDir, st) + } + defer m.Rmdir(ctx, RootInode, testDir) + + rule := &aclAPI.Rule{ + Owner: 7, + Group: 7, + Mask: 7, + Other: 7, + NamedUsers: []aclAPI.Entry{ + { + Id: 1001, + Perm: 4, + }, + }, + NamedGroups: nil, + } + + // case: setfacl + if st := m.SetFacl(ctx, testDirIno, aclAPI.TypeAccess, rule); st != 0 { + t.Fatalf("setfacl error: %s", st) + } + + // case: getfacl + rule2 := &aclAPI.Rule{} + if st := m.GetFacl(ctx, testDirIno, aclAPI.TypeAccess, rule2); st != 0 { + t.Fatalf("getfacl error: %s", st) + } + assert.True(t, rule.IsEqual(rule2)) + + // case: setfacl will sync mode (group class is mask) + attr2 := &Attr{} + if st := m.GetAttr(ctx, testDirIno, attr2); st != 0 { + t.Fatalf("getattr error: %s", st) + } + assert.Equal(t, uint16(0777), attr2.Mode) + + // case: setattr will sync acl + set := uint16(0) | SetAttrMode + attr2 = &Attr{ + Mode: 0555, + } + if st := m.SetAttr(ctx, testDirIno, set, 0, attr2); st != 0 { + t.Fatalf("setattr error: %s", st) + } + + rule3 := &aclAPI.Rule{} + if st := m.GetFacl(ctx, testDirIno, aclAPI.TypeAccess, rule3); st != 0 { + t.Fatalf("getfacl error: %s", st) + } + rule2.Owner = 5 + rule2.Mask = 5 + rule2.Other = 5 + assert.True(t, rule3.IsEqual(rule2)) + + // case: remove acl + rule3.Mask = 0xFFFF + rule3.NamedUsers = nil + rule3.NamedGroups = nil + if st := m.SetFacl(ctx, testDirIno, aclAPI.TypeAccess, rule3); st != 0 { + t.Fatalf("setattr error: %s", st) + } + + st := m.GetFacl(ctx, testDirIno, aclAPI.TypeAccess, nil) + assert.Equal(t, ENOATTR, st) + + attr2 = &Attr{} + if st := m.GetAttr(ctx, testDirIno, attr2); st != 0 { + t.Fatalf("getattr error: %s", st) + } + assert.Equal(t, uint16(0575), attr2.Mode) + + // case: set normal default acl + if st := m.SetFacl(ctx, testDirIno, aclAPI.TypeDefault, rule); st != 0 { + t.Fatalf("setfacl error: %s", st) + } + + // case: get normal default acl + rule2 = &aclAPI.Rule{} + if st := m.GetFacl(ctx, testDirIno, aclAPI.TypeDefault, rule2); st != 0 { + t.Fatalf("getfacl error: %s", st) + } + assert.True(t, rule2.IsEqual(rule)) + + // case: mk subdir with normal default acl + subDir := "sub_dir" + var subDirIno Ino + attr2 = &Attr{} + + mode := uint16(0222) + // cumask will be ignored + if st := m.Mkdir(ctx, testDirIno, subDir, mode, 0022, 0, &subDirIno, attr2); st != 0 { + t.Fatalf("create %s: %s", subDir, st) + } + defer m.Rmdir(ctx, testDirIno, subDir) + + // subdir inherit default acl + rule3 = &aclAPI.Rule{} + if st := m.GetFacl(ctx, subDirIno, aclAPI.TypeDefault, rule3); st != 0 { + t.Fatalf("getfacl error: %s", st) + } + assert.True(t, rule3.IsEqual(rule2)) + + // subdir access acl + rule3 = &aclAPI.Rule{} + if st := m.GetFacl(ctx, subDirIno, aclAPI.TypeAccess, rule3); st != 0 { + t.Fatalf("getfacl error: %s", st) + } + rule2.Owner &= (mode >> 6) & 7 + rule2.Mask &= (mode >> 3) & 7 + rule2.Other &= mode & 7 + assert.True(t, rule3.IsEqual(rule2)) + + // case: set minimal default acl + rule = &aclAPI.Rule{ + Owner: 5, + Group: 5, + Mask: 0xFFFF, + Other: 5, + NamedUsers: nil, + NamedGroups: nil, + } + if st := m.SetFacl(ctx, testDirIno, aclAPI.TypeDefault, rule); st != 0 { + t.Fatalf("setfacl error: %s", st) + } + + // case: get minimal default acl + rule2 = &aclAPI.Rule{} + if st := m.GetFacl(ctx, testDirIno, aclAPI.TypeDefault, rule2); st != 0 { + t.Fatalf("getfacl error: %s", st) + } + assert.True(t, rule2.IsEqual(rule)) + + // case: mk subdir with minimal default acl + subDir2 := "sub_dir2" + var subDirIno2 Ino + attr2 = &Attr{} + + mode = uint16(0222) + if st := m.Mkdir(ctx, testDirIno, subDir2, mode, 0022, 0, &subDirIno2, attr2); st != 0 { + t.Fatalf("create %s: %s", subDir, st) + } + defer m.Rmdir(ctx, testDirIno, subDir2) + + // subdir inherit default acl + rule3 = &aclAPI.Rule{} + if st := m.GetFacl(ctx, subDirIno2, aclAPI.TypeDefault, rule3); st != 0 { + t.Fatalf("getfacl error: %s", st) + } + assert.True(t, rule3.IsEqual(rule2)) + + // subdir have no access acl + rule3 = &aclAPI.Rule{} + st = m.GetFacl(ctx, subDirIno2, aclAPI.TypeAccess, rule3) + assert.Equal(t, ENOATTR, st) + + attr2 = &Attr{} + if st := m.GetAttr(ctx, subDirIno2, attr2); st != 0 { + t.Fatalf("getattr error: %s", st) + } + assert.Equal(t, rule.GetMode(), attr2.Mode) + + // test cache all + sz := m.getBase().aclCache.Size() + err := m.getBase().en.cacheACLs(ctx) + assert.Nil(t, err) + assert.Equal(t, sz, m.getBase().aclCache.Size()) +} + func testMetaClient(t *testing.T, m Meta) { m.OnMsg(DeleteSlice, func(args ...interface{}) error { return nil }) ctx := Background diff --git a/pkg/meta/interface.go b/pkg/meta/interface.go index ca88fe9f5479..eec25ca610c7 100644 --- a/pkg/meta/interface.go +++ b/pkg/meta/interface.go @@ -28,6 +28,7 @@ import ( "syscall" "time" + aclAPI "github.com/juicedata/juicefs/pkg/acl" "github.com/juicedata/juicefs/pkg/utils" "github.com/prometheus/client_golang/prometheus" ) @@ -123,6 +124,10 @@ func (i Ino) IsTrash() bool { return i >= TrashInode } +func (i Ino) IsNormal() bool { + return i >= RootInode && i < TrashInode +} + var TrashName = ".trash" func isTrash(ino Ino) bool { @@ -161,6 +166,9 @@ type Attr struct { Parent Ino // inode of parent; 0 means tracked by parentKey (for hardlinks) Full bool // the attributes are completed or not KeepCache bool // whether to keep the cached page or not + + AccessACL uint32 // access ACL id (identical ACL rules share the same access ACL ID.) + DefaultACL uint32 // default ACL id (default ACL and the access ACL share the same cache and store) } func typeToStatType(_type uint8) uint32 { @@ -474,6 +482,9 @@ type Meta interface { // getBase return the base engine. getBase() *baseMeta InitMetrics(registerer prometheus.Registerer) + + SetFacl(ctx Context, ino Ino, aclType uint8, n *aclAPI.Rule) syscall.Errno + GetFacl(ctx Context, ino Ino, aclType uint8, n *aclAPI.Rule) syscall.Errno } type Creator func(driver, addr string, conf *Config) (Meta, error) diff --git a/pkg/meta/redis.go b/pkg/meta/redis.go index 053fa5565e95..0ca2f016256f 100644 --- a/pkg/meta/redis.go +++ b/pkg/meta/redis.go @@ -44,9 +44,9 @@ import ( "syscall" "time" - "github.com/pkg/errors" - + aclAPI "github.com/juicedata/juicefs/pkg/acl" "github.com/juicedata/juicefs/pkg/utils" + "github.com/pkg/errors" "github.com/redis/go-redis/v9" ) @@ -91,6 +91,7 @@ type redisMeta struct { } var _ Meta = &redisMeta{} +var _ engine = &redisMeta{} func init() { Register("redis", newRedisMeta) @@ -321,6 +322,22 @@ func (m *redisMeta) doInit(format *Format, force bool) error { return m.rdb.Set(ctx, m.inodeKey(1), m.marshal(attr), 0).Err() } +func (m *redisMeta) cacheACLs(ctx Context) error { + // cache all acls + vals, err := m.rdb.HGetAll(ctx, m.aclKey()).Result() + if err != nil { + return err + } + + for k, v := range vals { + id, _ := strconv.ParseUint(k, 10, 32) + tmpRule := &aclAPI.Rule{} + tmpRule.Decode([]byte(v)) + m.aclCache.Put(uint32(id), tmpRule) + } + return nil +} + func (m *redisMeta) Reset() error { if m.prefix != "" { return m.scan(Background, "*", func(keys []string) error { @@ -605,6 +622,10 @@ func (m *redisMeta) totalInodesKey() string { return m.prefix + totalInodes } +func (m *redisMeta) aclKey() string { + return m.prefix + "acl" +} + func (m *redisMeta) delfiles() string { return m.prefix + "delfiles" } @@ -799,11 +820,22 @@ func (m *redisMeta) Resolve(ctx Context, parent Ino, path string, inode *Ino, at } func (m *redisMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno { - a, err := m.rdb.Get(ctx, m.inodeKey(inode)).Bytes() - if err == nil { - m.parseAttr(a, attr) - } - return errno(err) + return errno(m.rdb.Watch(ctx, func(tx *redis.Tx) error { + val, err := tx.Get(ctx, m.inodeKey(inode)).Bytes() + if err != nil { + return err + } + m.parseAttr(val, attr) + + if attr != nil && attr.AccessACL != aclAPI.None { + rule, err := m.getACL(ctx, tx, attr.AccessACL) + if err != nil { + return err + } + attr.Mode = (rule.GetMode() & 0777) | (attr.Mode & 07000) + } + return nil + }, m.inodeKey(inode))) } type timeoutError interface { @@ -1141,13 +1173,39 @@ func (m *redisMeta) doSetAttr(ctx Context, inode Ino, set uint16, sugidclearmode return syscall.EPERM } now := time.Now() - dirtyAttr, st := m.mergeAttr(ctx, inode, set, &cur, attr, now) + + // get acl + var rule *aclAPI.Rule + if cur.AccessACL != aclAPI.None { + oldRule, err := m.getACL(ctx, tx, cur.AccessACL) + if err != nil { + return err + } + rule = &aclAPI.Rule{} + *rule = *oldRule + } + + dirtyAttr, st := m.mergeAttr(ctx, inode, set, &cur, attr, now, rule) if st != 0 { return st } if dirtyAttr == nil { return nil } + + // set acl + if rule != nil { + if err = m.tryLoadMissACLs(ctx, tx); err != nil { + logger.Warnf("SetAttr: load miss acls error: %s", err) + } + + aclId, err := m.insertACL(ctx, tx, rule) + if err != nil { + return err + } + setAttrACLId(dirtyAttr, aclAPI.TypeAccess, aclId) + } + dirtyAttr.Ctime = now.Unix() dirtyAttr.Ctimensec = uint32(now.Nanosecond()) _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { @@ -1216,7 +1274,6 @@ func (m *redisMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, m attr = &Attr{} } attr.Typ = _type - attr.Mode = mode & ^cumask attr.Uid = ctx.Uid() attr.Gid = ctx.Gid() if _type == TypeDirectory { @@ -1287,6 +1344,40 @@ func (m *redisMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, m return syscall.EEXIST } + mode &= 07777 + if pattr.DefaultACL != aclAPI.None && _type != TypeSymlink { + // inherit default acl + if _type == TypeDirectory { + attr.DefaultACL = pattr.DefaultACL + } + + // set access acl by parent's default acl + rule, err := m.getACL(ctx, tx, pattr.DefaultACL) + if err != nil { + return err + } + + if rule.IsMinimal() { + // simple acl as default + attr.Mode = (mode & 0xFE00) | rule.GetMode() + } else { + if err = m.tryLoadMissACLs(ctx, tx); err != nil { + logger.Warnf("Mknode: load miss acls error: %s", err) + } + + cRule := rule.ChildAccessACL(mode) + id, err := m.insertACL(ctx, tx, cRule) + if err != nil { + return err + } + + attr.AccessACL = id + attr.Mode = (mode & 0xFE00) | cRule.GetMode() + } + } else { + attr.Mode = mode & ^cumask + } + var updateParent bool now := time.Now() if parent != TrashInode { @@ -3441,6 +3532,14 @@ func (m *redisMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Err *names = append(*names, []byte(name)...) *names = append(*names, 0) } + + val, err := m.rdb.Get(ctx, m.inodeKey(inode)).Bytes() + if err != nil { + return errno(err) + } + attr := &Attr{} + m.parseAttr(val, attr) + setXAttrACL(names, attr.AccessACL, attr.DefaultACL) return 0 } @@ -4448,3 +4547,160 @@ func (m *redisMeta) doTouchAtime(ctx Context, inode Ino, attr *Attr, now time.Ti }, m.inodeKey(inode)) return updated, err } + +func (m *redisMeta) doSetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno { + return errno(m.txn(ctx, func(tx *redis.Tx) error { + val, err := tx.Get(ctx, m.inodeKey(ino)).Bytes() + if err != nil { + return err + } + attr := &Attr{} + m.parseAttr(val, attr) + + if ctx.Uid() != 0 && ctx.Uid() != attr.Uid { + return syscall.EPERM + } + + if attr.Flags&FlagImmutable != 0 { + return syscall.EPERM + } + + oriACL, oriMode := getAttrACLId(attr, aclType), attr.Mode + if rule.IsEmpty() { + // remove acl + setAttrACLId(attr, aclType, aclAPI.None) + } else if rule.IsMinimal() && aclType == aclAPI.TypeAccess { + // remove acl + setAttrACLId(attr, aclType, aclAPI.None) + // set mode + attr.Mode &= 07000 + attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Group & 7) << 3) | (rule.Other & 7) + } else { + if err = m.tryLoadMissACLs(ctx, tx); err != nil { + logger.Warnf("SetFacl: load miss acls error: %s", err) + } + + // set acl + rule.InheritPerms(attr.Mode) + aclId, err := m.insertACL(ctx, tx, rule) + if err != nil { + return err + } + setAttrACLId(attr, aclType, aclId) + + // set mode + if aclType == aclAPI.TypeAccess { + attr.Mode &= 07000 + attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Mask & 7) << 3) | (rule.Other & 7) + } + } + + // update attr + if oriACL != getAttrACLId(attr, aclType) || oriMode != attr.Mode { + now := time.Now() + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.Set(ctx, m.inodeKey(ino), m.marshal(attr), 0) + return nil + }) + return err + } + return nil + }, m.inodeKey(ino))) +} + +func (m *redisMeta) doGetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno { + return errno(m.rdb.Watch(ctx, func(tx *redis.Tx) error { + val, err := tx.Get(ctx, m.inodeKey(ino)).Bytes() + if err != nil { + return err + } + attr := &Attr{} + m.parseAttr(val, attr) + m.of.Update(ino, attr) + + aclId := getAttrACLId(attr, aclType) + if aclId == aclAPI.None { + return ENOATTR + } + + a, err := m.getACL(ctx, tx, aclId) + if err != nil { + return err + } + *rule = *a + return nil + }, m.inodeKey(ino))) +} + +func (m *redisMeta) getACL(ctx Context, tx *redis.Tx, id uint32) (*aclAPI.Rule, error) { + if cRule := m.aclCache.Get(id); cRule != nil { + return cRule, nil + } + + cmds, err := tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error { + pipe.HGet(ctx, m.aclKey(), strconv.FormatUint(uint64(id), 10)) + return nil + }) + if err != nil { + return nil, err + } + + val, err := cmds[0].(*redis.StringCmd).Bytes() + if err != nil { + return nil, err + } + if val == nil { + return nil, ENOATTR + } + + rule := &aclAPI.Rule{} + rule.Decode(val) + m.aclCache.Put(id, rule) + return rule, nil +} + +func (m *redisMeta) insertACL(ctx Context, tx *redis.Tx, rule *aclAPI.Rule) (uint32, error) { + var aclId uint32 + if aclId = m.aclCache.GetId(rule); aclId == aclAPI.None { + // TODO failures may result in some id wastage. + newId, err := m.incrCounter(aclCounter, 1) + if err != nil { + return aclAPI.None, err + } + aclId = uint32(newId) + + if err = tx.HSet(ctx, m.aclKey(), strconv.FormatUint(uint64(aclId), 10), rule.Encode()).Err(); err != nil { + return aclAPI.None, err + } + m.aclCache.Put(aclId, rule) + } + return aclId, nil +} + +func (m *redisMeta) tryLoadMissACLs(ctx Context, tx *redis.Tx) error { + // try load miss + missIds := m.aclCache.GetMissIds() + if len(missIds) > 0 { + missKeys := make([]string, len(missIds)) + for i, id := range missIds { + missKeys[i] = strconv.FormatUint(uint64(id), 10) + } + + vals, err := tx.HMGet(ctx, m.aclKey(), missKeys...).Result() + if err != nil { + return err + } + for i, data := range vals { + var rule *aclAPI.Rule + if data != nil { + rule = &aclAPI.Rule{} + rule.Decode(data.([]byte)) + } + // may have empty slot + m.aclCache.Put(missIds[i], rule) + } + } + return nil +} diff --git a/pkg/meta/sql.go b/pkg/meta/sql.go index 3a93bb9b9d35..489e7b29a418 100644 --- a/pkg/meta/sql.go +++ b/pkg/meta/sql.go @@ -39,12 +39,13 @@ import ( "xorm.io/xorm/log" "xorm.io/xorm/names" + aclAPI "github.com/juicedata/juicefs/pkg/acl" "github.com/juicedata/juicefs/pkg/utils" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) -const MaxFieldsCountOfTable = 16 // node table +const MaxFieldsCountOfTable = 18 // node table type setting struct { Name string `xorm:"pk"` @@ -65,22 +66,67 @@ type edge struct { } type node struct { - Inode Ino `xorm:"pk"` - Type uint8 `xorm:"notnull"` - Flags uint8 `xorm:"notnull"` - Mode uint16 `xorm:"notnull"` - Uid uint32 `xorm:"notnull"` - Gid uint32 `xorm:"notnull"` - Atime int64 `xorm:"notnull"` - Mtime int64 `xorm:"notnull"` - Ctime int64 `xorm:"notnull"` - Atimensec int16 `xorm:"notnull default 0"` - Mtimensec int16 `xorm:"notnull default 0"` - Ctimensec int16 `xorm:"notnull default 0"` - Nlink uint32 `xorm:"notnull"` - Length uint64 `xorm:"notnull"` - Rdev uint32 - Parent Ino + Inode Ino `xorm:"pk"` + Type uint8 `xorm:"notnull"` + Flags uint8 `xorm:"notnull"` + Mode uint16 `xorm:"notnull"` + Uid uint32 `xorm:"notnull"` + Gid uint32 `xorm:"notnull"` + Atime int64 `xorm:"notnull"` + Mtime int64 `xorm:"notnull"` + Ctime int64 `xorm:"notnull"` + Atimensec int16 `xorm:"notnull default 0"` + Mtimensec int16 `xorm:"notnull default 0"` + Ctimensec int16 `xorm:"notnull default 0"` + Nlink uint32 `xorm:"notnull"` + Length uint64 `xorm:"notnull"` + Rdev uint32 + Parent Ino + AccessACLId uint32 `xorm:"'access_acl_id'"` + DefaultACLId uint32 `xorm:"'default_acl_id'"` +} + +func getACLIdColName(aclType uint8) string { + switch aclType { + case aclAPI.TypeAccess: + return "access_acl_id" + case aclAPI.TypeDefault: + return "default_acl_id" + } + return "" +} + +type acl struct { + Id uint32 `xorm:"pk autoincr"` + Owner uint16 + Group uint16 + Mask uint16 + Other uint16 + NamedUsers []byte + NamedGroups []byte +} + +func newSQLAcl(r *aclAPI.Rule) *acl { + a := &acl{ + Owner: r.Owner, + Group: r.Group, + Mask: r.Mask, + Other: r.Other, + } + a.NamedUsers = r.NamedUsers.Encode() + a.NamedGroups = r.NamedGroups.Encode() + return a +} + +func (a *acl) toRule() *aclAPI.Rule { + r := &aclAPI.Rule{} + r.Owner = a.Owner + r.Group = a.Group + r.Other = a.Other + r.Mask = a.Mask + r.NamedUsers.Decode(a.NamedUsers) + r.NamedGroups.Decode(a.NamedGroups) + return r } type namedNode struct { @@ -191,6 +237,9 @@ type dbMeta struct { noReadOnlyTxn bool } +var _ Meta = &dbMeta{} +var _ engine = &dbMeta{} + type dbSnap struct { node map[Ino]*node symlink map[Ino]*symlink @@ -326,6 +375,9 @@ func (m *dbMeta) doInit(format *Format, force bool) error { if err := m.syncTable(new(detachedNode)); err != nil { return fmt.Errorf("create table detachedNode: %s", err) } + if err := m.syncTable(new(acl)); err != nil { + return fmt.Errorf("create table acl: %s", err) + } var s = setting{Name: "format"} var ok bool @@ -414,12 +466,26 @@ func (m *dbMeta) doInit(format *Format, force bool) error { }) } +func (m *dbMeta) cacheACLs(ctx Context) error { + return m.roTxn(func(s *xorm.Session) error { + // cache all acls + var acls []acl + if err := s.Find(&acls); err != nil { + return err + } + for _, val := range acls { + m.aclCache.Put(val.Id, val.toRule()) + } + return nil + }) +} + func (m *dbMeta) Reset() error { return m.db.DropTables(&setting{}, &counter{}, &node{}, &edge{}, &symlink{}, &xattr{}, &chunk{}, &sliceRef{}, &delslices{}, &session{}, &session2{}, &sustained{}, &delfile{}, - &flock{}, &plock{}, &dirStats{}, &dirQuota{}, &detachedNode{}) + &flock{}, &plock{}, &dirStats{}, &dirQuota{}, &detachedNode{}, &acl{}) } func (m *dbMeta) doLoad() (data []byte, err error) { @@ -441,7 +507,7 @@ func (m *dbMeta) doLoad() (data []byte, err error) { func (m *dbMeta) doNewSession(sinfo []byte) error { // add new table - err := m.syncTable(new(session2), new(delslices), new(dirStats), new(detachedNode), new(dirQuota)) + err := m.syncTable(new(session2), new(delslices), new(dirStats), new(detachedNode), new(dirQuota), new(acl)) if err != nil { return fmt.Errorf("update table session2, delslices, dirstats, detachedNode, dirQuota: %s", err) } @@ -814,6 +880,8 @@ func (m *dbMeta) parseAttr(n *node, attr *Attr) { attr.Rdev = n.Rdev attr.Parent = n.Parent attr.Full = true + attr.AccessACL = n.AccessACLId + attr.DefaultACL = n.DefaultACLId } func (m *dbMeta) parseNode(attr *Attr, n *node) { @@ -835,6 +903,8 @@ func (m *dbMeta) parseNode(attr *Attr, n *node) { n.Length = attr.Length n.Rdev = attr.Rdev n.Parent = attr.Parent + n.AccessACLId = attr.AccessACL + n.DefaultACLId = attr.DefaultACL } func (m *dbMeta) updateStats(space int64, inodes int64) { @@ -897,12 +967,21 @@ func (m *dbMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno { return errno(m.roTxn(func(s *xorm.Session) error { var n = node{Inode: inode} ok, err := s.Get(&n) - if ok { - m.parseAttr(&n, attr) - } else if err == nil { - err = syscall.ENOENT + if err != nil { + return err + } else if !ok { + return syscall.ENOENT } - return err + m.parseAttr(&n, attr) + + if attr != nil && attr.AccessACL != aclAPI.None { + rule, err := m.getACL(s, attr.AccessACL) + if err != nil { + return err + } + attr.Mode = (rule.GetMode() & 0777) | (attr.Mode & 07000) + } + return nil })) } @@ -922,18 +1001,45 @@ func (m *dbMeta) doSetAttr(ctx Context, inode Ino, set uint16, sugidclearmode ui return syscall.EPERM } now := time.Now() - dirtyAttr, st := m.mergeAttr(ctx, inode, set, &curAttr, attr, now) + + // get acl + var rule *aclAPI.Rule + if curAttr.AccessACL != aclAPI.None { + oldRule, err := m.getACL(s, curAttr.AccessACL) + if err != nil { + return err + } + rule = &aclAPI.Rule{} + *rule = *oldRule + } + + dirtyAttr, st := m.mergeAttr(ctx, inode, set, &curAttr, attr, now, rule) if st != 0 { return st } if dirtyAttr == nil { return nil } + + // set acl + if rule != nil { + if err = m.tryLoadMissACLs(s); err != nil { + logger.Warnf("SetAttr: load miss acls error: %s", err) + } + + aclId, err := m.insertACL(s, rule) + if err != nil { + return err + } + setAttrACLId(dirtyAttr, aclAPI.TypeAccess, aclId) + } + var dirtyNode node m.parseNode(dirtyAttr, &dirtyNode) dirtyNode.Ctime = now.UnixNano() / 1e3 dirtyNode.Ctimensec = int16(now.Nanosecond() % 1000) - _, err = s.Cols("flags", "mode", "uid", "gid", "atime", "mtime", "ctime", "atimensec", "mtimensec", "ctimensec"). + _, err = s.Cols("flags", "mode", "uid", "gid", "atime", "mtime", "ctime", + "atimensec", "mtimensec", "ctimensec", "access_acl_id", "default_acl_id"). Update(&dirtyNode, &node{Inode: inode}) if err == nil { m.parseAttr(&dirtyNode, attr) @@ -1210,7 +1316,6 @@ func (m *dbMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode var n node n.Inode = ino n.Type = _type - n.Mode = mode & ^cumask n.Uid = ctx.Uid() n.Gid = ctx.Gid() if _type == TypeDirectory { @@ -1285,6 +1390,40 @@ func (m *dbMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode return syscall.EEXIST } + mode &= 07777 + if pattr.DefaultACL != aclAPI.None && _type != TypeSymlink { + // inherit default acl + if _type == TypeDirectory { + n.DefaultACLId = pattr.DefaultACL + } + + // set access acl by parent's default acl + rule, err := m.getACL(s, pattr.DefaultACL) + if err != nil { + return err + } + + if rule.IsMinimal() { + // simple acl as default + n.Mode = (mode & 0xFE00) | rule.GetMode() + } else { + if err = m.tryLoadMissACLs(s); err != nil { + logger.Warnf("Mknode: load miss acls error: %s", err) + } + + cRule := rule.ChildAccessACL(mode) + id, err := m.insertACL(s, cRule) + if err != nil { + return err + } + + n.AccessACLId = id + n.Mode = (mode & 0xFE00) | cRule.GetMode() + } + } else { + n.Mode = mode & ^cumask + } + var updateParent bool now := time.Now().UnixNano() if parent != TrashInode { @@ -3181,6 +3320,17 @@ func (m *dbMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Errno *names = append(*names, []byte(x.Name)...) *names = append(*names, 0) } + + var n = node{Inode: inode} + ok, err := s.Get(&n) + if err != nil { + return err + } else if !ok { + return syscall.ENOENT + } + attr := &Attr{} + m.parseAttr(&n, attr) + setXAttrACL(names, attr.AccessACL, attr.DefaultACL) return nil })) } @@ -3810,6 +3960,9 @@ func (m *dbMeta) LoadMeta(r io.Reader) error { if err := m.syncTable(new(detachedNode)); err != nil { return fmt.Errorf("create table detachedNode: %s", err) } + if err = m.syncTable(new(acl)); err != nil { + return fmt.Errorf("create table acl: %s", err) + } var batch int switch m.Name() { case "sqlite3": @@ -4168,3 +4321,148 @@ func (m *dbMeta) doTouchAtime(ctx Context, inode Ino, attr *Attr, now time.Time) }, inode) return updated, err } + +func (m *dbMeta) insertACL(s *xorm.Session, rule *aclAPI.Rule) (uint32, error) { + var aclId uint32 + if aclId = m.aclCache.GetId(rule); aclId == aclAPI.None { + // TODO conflicts from multiple clients are rare and result in only minor duplicates, thus not addressed for now. + val := newSQLAcl(rule) + if _, err := s.Insert(val); err != nil { + return aclAPI.None, err + } + aclId = val.Id + m.aclCache.Put(aclId, rule) + } + return aclId, nil +} + +func (m *dbMeta) tryLoadMissACLs(s *xorm.Session) error { + missIds := m.aclCache.GetMissIds() + if len(missIds) > 0 { + var acls []acl + if err := s.In("id", missIds).Find(&acls); err != nil { + return err + } + + for _, data := range acls { + m.aclCache.Put(data.Id, data.toRule()) + } + } + return nil +} + +func (m *dbMeta) getACL(s *xorm.Session, id uint32) (*aclAPI.Rule, error) { + if cRule := m.aclCache.Get(id); cRule != nil { + return cRule, nil + } + + var aclVal = &acl{Id: id} + if ok, err := s.Get(aclVal); err != nil { + return nil, err + } else if !ok { + return nil, ENOATTR + } + + r := aclVal.toRule() + m.aclCache.Put(id, r) + return r, nil +} + +func (m *dbMeta) doSetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno { + return errno(m.txn(func(s *xorm.Session) error { + attr := &Attr{} + n := &node{Inode: ino} + if ok, err := s.ForUpdate().Get(n); err != nil { + return err + } else if !ok { + return syscall.ENOENT + } + m.parseAttr(n, attr) + + if ctx.Uid() != 0 && ctx.Uid() != attr.Uid { + return syscall.EPERM + } + + if attr.Flags&FlagImmutable != 0 { + return syscall.EPERM + } + + oriACL, oriMode := getAttrACLId(attr, aclType), attr.Mode + if rule.IsEmpty() { + // remove acl + setAttrACLId(attr, aclType, aclAPI.None) + } else if rule.IsMinimal() && aclType == aclAPI.TypeAccess { + // remove acl + setAttrACLId(attr, aclType, aclAPI.None) + // set mode + attr.Mode &= 07000 + attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Group & 7) << 3) | (rule.Other & 7) + } else { + if err := m.tryLoadMissACLs(s); err != nil { + logger.Warnf("SetFacl: load miss acls error: %s", err) + } + + // set acl + rule.InheritPerms(attr.Mode) + aclId, err := m.insertACL(s, rule) + if err != nil { + return err + } + setAttrACLId(attr, aclType, aclId) + + // set mode + if aclType == aclAPI.TypeAccess { + attr.Mode &= 07000 + attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Mask & 7) << 3) | (rule.Other & 7) + } + } + + // update attr + var updateCols []string + if oriACL != getAttrACLId(attr, aclType) { + updateCols = append(updateCols, getACLIdColName(aclType)) + } + if oriMode != attr.Mode { + updateCols = append(updateCols, "mode") + } + if len(updateCols) > 0 { + updateCols = append(updateCols, "ctime", "ctimensec") + + var dirtyNode node + m.parseNode(attr, &dirtyNode) + now := time.Now() + dirtyNode.Ctime = now.UnixNano() / 1e3 + dirtyNode.Ctimensec = int16(now.Nanosecond() % 1000) + _, err := s.Cols(updateCols...).Update(&dirtyNode, &node{Inode: ino}) + return err + } + + return nil + }, ino)) +} + +func (m *dbMeta) doGetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno { + return errno(m.roTxn(func(s *xorm.Session) error { + attr := &Attr{} + n := &node{Inode: ino} + if ok, err := s.Get(n); err != nil { + return err + } else if !ok { + return syscall.ENOENT + } + m.parseAttr(n, attr) + m.of.Update(ino, attr) + + aclId := getAttrACLId(attr, aclType) + if aclId == aclAPI.None { + return ENOATTR + } + + a, err := m.getACL(s, aclId) + if err != nil { + return err + } + *rule = *a + return nil + })) +} diff --git a/pkg/meta/tkv.go b/pkg/meta/tkv.go index f24348fa2011..f7c2a69329b4 100644 --- a/pkg/meta/tkv.go +++ b/pkg/meta/tkv.go @@ -34,6 +34,7 @@ import ( "syscall" "time" + aclAPI "github.com/juicedata/juicefs/pkg/acl" "github.com/pkg/errors" "github.com/juicedata/juicefs/pkg/utils" @@ -79,6 +80,9 @@ type kvMeta struct { snap map[Ino]*DumpedEntry } +var _ Meta = &kvMeta{} +var _ engine = &kvMeta{} + var drivers = make(map[string]func(string) (tkvClient, error)) func newTkvClient(driver, addr string) (tkvClient, error) { @@ -165,6 +169,7 @@ func (m *kvMeta) fmtKey(args ...interface{}) []byte { name ... sliceId cccccccc session ssssssss + aclId aaaa All keys: setting format @@ -187,6 +192,7 @@ All keys: Uiiiiiiii data length, space and inodes usage in directory Niiiiiiii detached inde QDiiiiiiii directory quota + Raaaa POSIX acl */ func (m *kvMeta) inodeKey(inode Ino) []byte { @@ -249,6 +255,16 @@ func (m *kvMeta) dirQuotaKey(inode Ino) []byte { return m.fmtKey("QD", inode) } +func (m *kvMeta) aclKey(id uint32) []byte { + return m.fmtKey("R", id) +} + +func (m *kvMeta) parseACLId(key string) uint32 { + // trim "R" + rb := utils.ReadBuffer([]byte(key[1:])) + return rb.Get32() +} + func (m *kvMeta) parseSid(key string) uint64 { buf := []byte(key[2:]) // "SE" or "SH" if len(buf) != 8 { @@ -472,6 +488,20 @@ func (m *kvMeta) doInit(format *Format, force bool) error { }) } +func (m *kvMeta) cacheACLs(ctx Context) error { + // cache all acls + acls, err := m.scanValues(m.fmtKey("R"), -1, nil) + if err != nil { + return err + } + for key, val := range acls { + tmpRule := &aclAPI.Rule{} + tmpRule.Decode(val) + m.aclCache.Put(m.parseACLId(key), tmpRule) + } + return nil +} + func (m *kvMeta) Reset() error { return m.client.reset(nil) } @@ -854,13 +884,22 @@ func (m *kvMeta) doLookup(ctx Context, parent Ino, name string, inode *Ino, attr } func (m *kvMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno { - a, err := m.get(m.inodeKey(inode)) - if a != nil { - m.parseAttr(a, attr) - } else if err == nil { - err = syscall.ENOENT - } - return errno(err) + return errno(m.client.txn(func(tx *kvTxn) error { + val := tx.get(m.inodeKey(inode)) + if val == nil { + return syscall.ENOENT + } + m.parseAttr(val, attr) + + if attr != nil && attr.AccessACL != aclAPI.None { + rule, err := m.getACL(tx, attr.AccessACL) + if err != nil { + return err + } + attr.Mode = (rule.GetMode() & 0777) | (attr.Mode & 07000) + } + return nil + }, 0)) } func (m *kvMeta) doSetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr) syscall.Errno { @@ -875,13 +914,40 @@ func (m *kvMeta) doSetAttr(ctx Context, inode Ino, set uint16, sugidclearmode ui return syscall.EPERM } now := time.Now() - dirtyAttr, st := m.mergeAttr(ctx, inode, set, &cur, attr, now) + + // get acl + var rule *aclAPI.Rule + if cur.AccessACL != aclAPI.None { + var err error + oldRule, err := m.getACL(tx, cur.AccessACL) + if err != nil { + return err + } + rule = &aclAPI.Rule{} + *rule = *oldRule + } + + dirtyAttr, st := m.mergeAttr(ctx, inode, set, &cur, attr, now, rule) if st != 0 { return st } if dirtyAttr == nil { return nil } + + // set acl + if rule != nil { + if err := m.tryLoadMissACLs(tx); err != nil { + logger.Warnf("SetAttr: load miss acls error: %s", err) + } + + aclId, err := m.insertACL(tx, rule) + if err != nil { + return err + } + setAttrACLId(dirtyAttr, aclAPI.TypeAccess, aclId) + } + dirtyAttr.Ctime = now.Unix() dirtyAttr.Ctimensec = uint32(now.Nanosecond()) tx.set(m.inodeKey(inode), m.marshal(dirtyAttr)) @@ -1105,7 +1171,6 @@ func (m *kvMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode attr = &Attr{} } attr.Typ = _type - attr.Mode = mode & ^cumask attr.Uid = ctx.Uid() attr.Gid = ctx.Gid() if _type == TypeDirectory { @@ -1171,6 +1236,40 @@ func (m *kvMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode return syscall.EEXIST } + mode &= 07777 + if pattr.DefaultACL != aclAPI.None && _type != TypeSymlink { + // inherit default acl + if _type == TypeDirectory { + attr.DefaultACL = pattr.DefaultACL + } + + // set access acl by parent's default acl + rule, err := m.getACL(tx, pattr.DefaultACL) + if err != nil { + return err + } + + if rule.IsMinimal() { + // simple acl as default + attr.Mode = (mode & 0xFE00) | rule.GetMode() + } else { + if err = m.tryLoadMissACLs(tx); err != nil { + logger.Warnf("Mknode: load miss acls error: %s", err) + } + + cRule := rule.ChildAccessACL(mode) + id, err := m.insertACL(tx, cRule) + if err != nil { + return err + } + + attr.AccessACL = id + attr.Mode = (mode & 0xFE00) | cRule.GetMode() + } + } else { + attr.Mode = mode & ^cumask + } + var updateParent bool now := time.Now() if parent != TrashInode { @@ -2744,6 +2843,17 @@ func (m *kvMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Errno *names = append(*names, name[prefix:]...) *names = append(*names, 0) } + + val, err := m.get(m.inodeKey(inode)) + if err != nil { + return errno(err) + } + if val == nil { + return syscall.ENOENT + } + attr := &Attr{} + m.parseAttr(val, attr) + setXAttrACL(names, attr.AccessACL, attr.DefaultACL) return 0 } @@ -3587,3 +3697,134 @@ func (m *kvMeta) doTouchAtime(ctx Context, inode Ino, attr *Attr, now time.Time) }, inode) return updated, err } + +func (m *kvMeta) doSetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno { + return errno(m.txn(func(tx *kvTxn) error { + val := tx.get(m.inodeKey(ino)) + if val == nil { + return syscall.ENOENT + } + attr := &Attr{} + m.parseAttr(val, attr) + + if ctx.Uid() != 0 && ctx.Uid() != attr.Uid { + return syscall.EPERM + } + + if attr.Flags&FlagImmutable != 0 { + return syscall.EPERM + } + + oriACL, oriMode := getAttrACLId(attr, aclType), attr.Mode + if rule.IsEmpty() { + // remove acl + setAttrACLId(attr, aclType, aclAPI.None) + } else if rule.IsMinimal() && aclType == aclAPI.TypeAccess { + // remove acl + setAttrACLId(attr, aclType, aclAPI.None) + // set mode + attr.Mode &= 07000 + attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Group & 7) << 3) | (rule.Other & 7) + } else { + if err := m.tryLoadMissACLs(tx); err != nil { + logger.Warnf("SetFacl: load miss acls error: %s", err) + } + + // set acl + rule.InheritPerms(attr.Mode) + aclId, err := m.insertACL(tx, rule) + if err != nil { + return err + } + setAttrACLId(attr, aclType, aclId) + + // set mode + if aclType == aclAPI.TypeAccess { + attr.Mode &= 07000 + attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Mask & 7) << 3) | (rule.Other & 7) + } + } + + // update attr + if oriACL != getAttrACLId(attr, aclType) || oriMode != attr.Mode { + now := time.Now() + attr.Ctime = now.Unix() + attr.Ctimensec = uint32(now.Nanosecond()) + tx.set(m.inodeKey(ino), m.marshal(attr)) + } + return nil + }, ino)) +} + +func (m *kvMeta) doGetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno { + return errno(m.client.txn(func(tx *kvTxn) error { + val := tx.get(m.inodeKey(ino)) + if val == nil { + return syscall.ENOENT + } + attr := &Attr{} + m.parseAttr(val, attr) + m.of.Update(ino, attr) + + aclId := getAttrACLId(attr, aclType) + if aclId == aclAPI.None { + return ENOATTR + } + + a, err := m.getACL(tx, aclId) + if err != nil { + return err + } + *rule = *a + return nil + }, 0)) +} + +func (m *kvMeta) insertACL(tx *kvTxn, rule *aclAPI.Rule) (uint32, error) { + var aclId uint32 + if aclId = m.aclCache.GetId(rule); aclId == aclAPI.None { + newId, err := m.incrCounter(aclCounter, 1) + if err != nil { + return aclAPI.None, err + } + aclId = uint32(newId) + + tx.set(m.aclKey(aclId), rule.Encode()) + m.aclCache.Put(aclId, rule) + } + return aclId, nil +} + +func (m *kvMeta) tryLoadMissACLs(tx *kvTxn) error { + missIds := m.aclCache.GetMissIds() + if len(missIds) > 0 { + missKeys := make([][]byte, len(missIds)) + for i, id := range missIds { + missKeys[i] = m.aclKey(id) + } + + acls := tx.gets(missKeys...) + for i, data := range acls { + r := &aclAPI.Rule{} + r.Decode(data) + m.aclCache.Put(missIds[i], r) + } + } + return nil +} + +func (m *kvMeta) getACL(tx *kvTxn, id uint32) (*aclAPI.Rule, error) { + if cRule := m.aclCache.Get(id); cRule != nil { + return cRule, nil + } + + val := tx.get(m.aclKey(id)) + if val == nil { + return nil, ENOATTR + } + + rule := &aclAPI.Rule{} + rule.Decode(val) + m.aclCache.Put(id, rule) + return rule, nil +} diff --git a/pkg/meta/utils.go b/pkg/meta/utils.go index 639e6b11f94f..8df768c202d8 100644 --- a/pkg/meta/utils.go +++ b/pkg/meta/utils.go @@ -34,6 +34,7 @@ import ( ) const ( + aclCounter = "aclMaxId" usedSpace = "usedSpace" totalInodes = "totalInodes" legacySessions = "sessions" diff --git a/pkg/vfs/vfs.go b/pkg/vfs/vfs.go index 3e571b072d5b..dae1ab304bef 100644 --- a/pkg/vfs/vfs.go +++ b/pkg/vfs/vfs.go @@ -24,6 +24,7 @@ import ( "syscall" "time" + "github.com/juicedata/juicefs/pkg/acl" "github.com/juicedata/juicefs/pkg/chunk" "github.com/juicedata/juicefs/pkg/meta" "github.com/juicedata/juicefs/pkg/utils" @@ -901,11 +902,23 @@ func (v *VFS) SetXattr(ctx Context, ino Ino, name string, value []byte, flags ui err = syscall.EINVAL return } - if name == "system.posix_acl_access" || name == "system.posix_acl_default" { - err = syscall.ENOTSUP - return + + aclType := GetACLType(name) + if aclType != acl.TypeNone { + if !v.Conf.Format.EnableACL { + err = syscall.ENOTSUP + return + } + + var rule *acl.Rule + rule, err = decodeACL(value) + if err != 0 { + return + } + err = v.Meta.SetFacl(ctx, ino, aclType, rule) + } else { + err = v.Meta.SetXattr(ctx, ino, name, value, flags) } - err = v.Meta.SetXattr(ctx, ino, name, value, flags) return } @@ -927,11 +940,22 @@ func (v *VFS) GetXattr(ctx Context, ino Ino, name string, size uint32) (value [] err = syscall.EINVAL return } - if name == "system.posix_acl_access" || name == "system.posix_acl_default" { - err = syscall.ENOTSUP - return + + aclType := GetACLType(name) + if aclType != acl.TypeNone { + if !v.Conf.Format.EnableACL { + err = syscall.ENOTSUP + return + } + + rule := &acl.Rule{} + if err = v.Meta.GetFacl(ctx, ino, aclType, rule); err != 0 { + return nil, err + } + value = encodeACL(rule) + } else { + err = v.Meta.GetXattr(ctx, ino, name, &value) } - err = v.Meta.GetXattr(ctx, ino, name, &value) if size > 0 && len(value) > int(size) { err = syscall.ERANGE } @@ -957,9 +981,6 @@ func (v *VFS) RemoveXattr(ctx Context, ino Ino, name string) (err syscall.Errno) err = syscall.EPERM return } - if name == "system.posix_acl_access" || name == "system.posix_acl_default" { - return syscall.ENOTSUP - } if len(name) > xattrMaxName { if runtime.GOOS == "darwin" { err = syscall.EPERM @@ -972,7 +993,18 @@ func (v *VFS) RemoveXattr(ctx Context, ino Ino, name string) (err syscall.Errno) err = syscall.EINVAL return } - err = v.Meta.RemoveXattr(ctx, ino, name) + + aclType := GetACLType(name) + if aclType != acl.TypeNone { + if !v.Conf.Format.EnableACL { + err = syscall.ENOTSUP + return + } + err = v.Meta.SetFacl(ctx, ino, aclType, acl.EmptyRule()) + } else { + err = v.Meta.RemoveXattr(ctx, ino, name) + } + return } @@ -1123,3 +1155,102 @@ func InitMetrics(registerer prometheus.Registerer) { registerer.MustRegister(opsDurationsHistogram) registerer.MustRegister(compactSizeHistogram) } + +// Linux ACL format: +// +// version:8 (2) +// flags:8 (0) +// filler:16 +// N * [ tag:16 perm:16 id:32 ] +// tag: +// 01 - user +// 02 - named user +// 04 - group +// 08 - named group +// 10 - mask +// 20 - other + +func encodeACL(n *acl.Rule) []byte { + length := 4 + 24 + uint32(len(n.NamedUsers)+len(n.NamedGroups))*8 + if n.Mask != 0xFFFF { + length += 8 + } + buff := make([]byte, length) + w := utils.NewNativeBuffer(buff) + w.Put8(acl.Version) // version + w.Put8(0) // flag + w.Put16(0) // filler + wRule := func(tag, perm uint16, id uint32) { + w.Put16(tag) + w.Put16(perm) + w.Put32(id) + } + wRule(1, n.Owner, 0xFFFFFFFF) + for _, rule := range n.NamedUsers { + wRule(2, rule.Perm, rule.Id) + } + wRule(4, n.Group, 0xFFFFFFFF) + for _, rule := range n.NamedGroups { + wRule(8, rule.Perm, rule.Id) + } + if n.Mask != 0xFFFF { + wRule(0x10, n.Mask, 0xFFFFFFFF) + } + wRule(0x20, n.Other, 0xFFFFFFFF) + return buff +} + +func decodeACL(buff []byte) (*acl.Rule, syscall.Errno) { + length := len(buff) + if length < 4 || ((length % 8) != 4) || buff[0] != acl.Version { + return nil, syscall.EINVAL + } + + n := acl.EmptyRule() + r := utils.NewNativeBuffer(buff[4:]) + for r.HasMore() { + tag := r.Get16() + perm := r.Get16() + id := r.Get32() + switch tag { + case 1: + if n.Owner != 0xFFFF { + return nil, syscall.EINVAL + } + n.Owner = perm + case 2: + n.NamedUsers = append(n.NamedUsers, acl.Entry{Id: id, Perm: perm}) + case 4: + if n.Group != 0xFFFF { + return nil, syscall.EINVAL + } + n.Group = perm + case 8: + n.NamedGroups = append(n.NamedGroups, acl.Entry{Id: id, Perm: perm}) + case 0x10: + if n.Mask != 0xFFFF { + return nil, syscall.EINVAL + } + n.Mask = perm + case 0x20: + if n.Other != 0xFFFF { + return nil, syscall.EINVAL + } + n.Other = perm + } + } + if n.Mask == 0xFFFF && len(n.NamedUsers)+len(n.NamedGroups) > 0 { + return nil, syscall.EINVAL + } + return n, 0 +} + +func GetACLType(name string) uint8 { + switch name { + case "system.posix_acl_access": + return acl.TypeAccess + case "system.posix_acl_default": + return acl.TypeDefault + } + return acl.TypeNone +}