From b1b6d29175e836ae9c578eb2e758c35f44043f5b Mon Sep 17 00:00:00 2001 From: jiefenghuang <155407896+jiefenghuang@users.noreply.github.com> Date: Mon, 15 Jan 2024 17:12:56 +0800 Subject: [PATCH] cmd/compact: add new compact cmd, used for specific path (#4337) Signed-off-by: jiefeng --- .gitignore | 33 +++++++---- cmd/compact_test.go | 118 +++++++++++++++++++++++++++++++++++++ cmd/compact_unix.go | 131 +++++++++++++++++++++++++++++++++++++++++ cmd/compact_windows.go | 38 ++++++++++++ cmd/main.go | 1 + go.mod | 6 +- go.sum | 5 +- pkg/meta/base.go | 44 ++++++++++++++ pkg/meta/context.go | 7 --- pkg/meta/interface.go | 22 +++++++ pkg/vfs/internal.go | 19 ++++++ 11 files changed, 404 insertions(+), 20 deletions(-) create mode 100644 cmd/compact_test.go create mode 100644 cmd/compact_unix.go create mode 100644 cmd/compact_windows.go diff --git a/.gitignore b/.gitignore index 88aa9df399b1..6dcf958fa451 100644 --- a/.gitignore +++ b/.gitignore @@ -5,26 +5,37 @@ ltmain.sh *.rej .deps .dirstamp -.vscode -.idea fstests/secfs.test fstests/flock !fstests/Makefile jfs -/juicefs -/juicefs.ceph -/juicefs.exe -/juicefs.lite -dist/ *.rdb .release-env *.so libjfs.h -.DS_Store docs/node_modules cmd/cmd -*.dump -*.out .hypothesis -__pycache__ /node_modules + +# os +.DS_Store + +# ide +.vscode +.idea + +# lang +__pycache__ + +# temp +pkg/meta/badger +*.dump +*.out + +# gen +/juicefs +/juicefs.ceph +/juicefs.exe +/juicefs.lite +dist/ diff --git a/cmd/compact_test.go b/cmd/compact_test.go new file mode 100644 index 000000000000..8a8bb3144ce4 --- /dev/null +++ b/cmd/compact_test.go @@ -0,0 +1,118 @@ +/* + * JuiceFS, Copyright 2024 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +func createTestFile(path string, size int, partCnt int) error { + file, err := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0666) + if err != nil { + return err + } + defer file.Close() + + content := []byte(strings.Repeat("a", size/partCnt)) + for i := 0; i < partCnt; i++ { + if _, err = file.Write(content); err != nil { + return err + } + if err = file.Sync(); err != nil { + return err + } + } + return nil +} + +type testDir struct { + path string + fileCnt int + fileSize int + filePart int +} + +func initForCompactTest(mountDir string, dirs map[string]testDir) { + for _, d := range dirs { + dirPath := filepath.Join(mountDir, d.path) + + err := os.MkdirAll(dirPath, 0755) + if err != nil { + panic(err) + } + + for i := 0; i < d.fileCnt; i++ { + if err := createTestFile(filepath.Join(dirPath, fmt.Sprintf("%d", i)), d.fileSize, d.filePart); err != nil { + panic(err) + } + } + } +} + +func TestCompact(t *testing.T) { + logger.Level = logrus.DebugLevel + var bucket string + mountTemp(t, &bucket, []string{"--trash-days=0"}, nil) + defer umountTemp(t) + + dirs := map[string]testDir{ + "d1/d11": { + path: "d1/d11", + fileCnt: 10, + fileSize: 10, + filePart: 2, + }, + "d1": { + path: "d1", + fileCnt: 20, + fileSize: 10, + filePart: 5, + }, + "d2": { + path: "d2", + fileCnt: 5, + fileSize: 20, + filePart: 4, + }, + } + initForCompactTest(testMountPoint, dirs) + dataDir := filepath.Join(bucket, testVolume, "chunks") + + sumChunks := 0 + for _, d := range dirs { + sumChunks += d.fileCnt * d.filePart + } + + chunkCnt := getFileCount(dataDir) + assert.Equal(t, sumChunks, chunkCnt) + + for _, d := range dirs { + err := Main([]string{"", "compact", filepath.Join(testMountPoint, d.path)}) + assert.Nil(t, err) + + chunkCnt = getFileCount(dataDir) + sumChunks -= d.fileCnt * (d.filePart - 1) + assert.Equal(t, sumChunks, chunkCnt) + } +} diff --git a/cmd/compact_unix.go b/cmd/compact_unix.go new file mode 100644 index 000000000000..04f67366dcca --- /dev/null +++ b/cmd/compact_unix.go @@ -0,0 +1,131 @@ +//go:build !windows +// +build !windows + +/* + * JuiceFS, Copyright 2024 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "fmt" + "math" + "path/filepath" + "syscall" + + "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" + "github.com/urfave/cli/v2" +) + +func cmdCompact() *cli.Command { + return &cli.Command{ + Name: "compact", + Action: compact, + Category: "TOOL", + Usage: "Trigger compaction of chunks", + ArgsUsage: "PATH...", + Description: ` + Examples: + # compact with path + $ juicefs compact /mnt/jfs/foo + `, + Flags: []cli.Flag{ + &cli.UintFlag{ + Name: "threads", + Aliases: []string{"p"}, + Value: 10, + Usage: "compact concurrency", + }, + }, + } +} + +func compact(ctx *cli.Context) error { + setup(ctx, 1) + + coCnt := ctx.Int("threads") + if coCnt <= 0 { + logger.Warn("threads should be > 0") + coCnt = 1 + } else if coCnt >= math.MaxUint16 { + logger.Warn("threads should be < MaxUint16") + coCnt = math.MaxUint16 + } + + paths := ctx.Args().Slice() + for i := 0; i < len(paths); i++ { + path, err := filepath.Abs(paths[i]) + if err != nil { + logger.Fatalf("get absolute path of %s error: %v", paths[i], err) + } + + inodeNo, err := utils.GetFileInode(path) + if err != nil { + logger.Errorf("lookup inode for %s error: %v", path, err) + continue + } + inode := meta.Ino(inodeNo) + + if !inode.IsValid() { + logger.Fatalf("inode numbe %d not valid", inode) + } + + if err = doCompact(inode, path, uint16(coCnt)); err != nil { + logger.Error(err) + } + } + return nil +} + +func doCompact(inode meta.Ino, path string, coCnt uint16) error { + f, err := openController(path) + if err != nil { + return fmt.Errorf("open control file for [%d:%s]: %w", inode, path, err) + } + defer f.Close() + + headerLen, bodyLen := uint32(8), uint32(8+2) + wb := utils.NewBuffer(headerLen + bodyLen) + wb.Put32(meta.CompactPath) + wb.Put32(bodyLen) + wb.Put64(uint64(inode)) + wb.Put16(coCnt) + + _, err = f.Write(wb.Bytes()) + if err != nil { + logger.Fatalf("write message: %s", err) + } + + progress := utils.NewProgress(false) + bar := progress.AddCountBar("Compacted chunks", 0) + _, errno := readProgress(f, func(totalChunks, currChunks uint64) { + bar.SetTotal(int64(totalChunks)) + bar.SetCurrent(int64(currChunks)) + }) + + bar.Done() + progress.Done() + + if errno == syscall.EINVAL { + logger.Fatalf("compact is not supported, please upgrade and mount again") + } + if errno != 0 { + return fmt.Errorf("compact [%d:%s] error: %s", inode, path, errno) + } + + logger.Infof("compact [%d:%s] success.", inode, path) + return nil +} diff --git a/cmd/compact_windows.go b/cmd/compact_windows.go new file mode 100644 index 000000000000..3a49757c10d7 --- /dev/null +++ b/cmd/compact_windows.go @@ -0,0 +1,38 @@ +//go:build windows +// +build windows + +/* + * JuiceFS, Copyright 2024 Juicedata, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "github.com/urfave/cli/v2" +) + +func cmdCompact() *cli.Command { + return &cli.Command{ + Name: "compact", + Action: compact, + Category: "TOOL", + Usage: "Trigger compaction of chunks, not supported for Windows", + } +} + +func compact(ctx *cli.Context) error { + logger.Warnf("not supported for Windows.") + return nil +} diff --git a/cmd/main.go b/cmd/main.go index 24a27678b9b4..741bba9c9a99 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -83,6 +83,7 @@ func Main(args []string) error { cmdDebug(), cmdClone(), cmdSummary(), + cmdCompact(), }, } diff --git a/go.mod b/go.mod index 2f7457c909a5..72a6182de07c 100644 --- a/go.mod +++ b/go.mod @@ -56,6 +56,7 @@ require ( github.com/redis/go-redis/v9 v9.0.2 github.com/sirupsen/logrus v1.9.0 github.com/smartystreets/goconvey v1.7.2 + github.com/stretchr/testify v1.8.4 github.com/studio-b12/gowebdav v0.0.0-20230203202212-3282f94193f2 github.com/tencentyun/cos-go-sdk-v5 v0.7.45 github.com/tikv/client-go/v2 v2.0.4 @@ -112,6 +113,7 @@ require ( github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/dchest/siphash v1.2.1 // indirect github.com/dgraph-io/ristretto v0.1.1 // indirect github.com/dgrijalva/jwt-go v3.2.0+incompatible // indirect @@ -172,7 +174,7 @@ require ( github.com/klauspost/reedsolomon v1.9.11 // indirect github.com/kr/fs v0.1.0 // indirect github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect - github.com/mattn/go-colorable v0.1.12 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/miekg/dns v1.1.41 // indirect @@ -198,6 +200,7 @@ require ( github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 // indirect github.com/pingcap/kvproto v0.0.0-20221129023506-621ec37aac7a // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/pquerna/ffjson v0.0.0-20190930134022-aa0246cd15f7 // indirect github.com/prometheus/procfs v0.11.0 // indirect @@ -246,6 +249,7 @@ require ( gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/square/go-jose.v2 v2.3.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect xorm.io/builder v0.3.7 // indirect ) diff --git a/go.sum b/go.sum index efe3d05b3102..0613725c12e3 100644 --- a/go.sum +++ b/go.sum @@ -693,8 +693,9 @@ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaO github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= -github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= @@ -702,6 +703,7 @@ github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcME github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= @@ -1362,6 +1364,7 @@ golang.org/x/sys v0.0.0-20220610221304-9f5ed59c137d/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220624220833-87e55d714810/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/pkg/meta/base.go b/pkg/meta/base.go index 4eb6a90f83ed..02b8cc0fa79a 100644 --- a/pkg/meta/base.go +++ b/pkg/meta/base.go @@ -2218,6 +2218,50 @@ func (m *baseMeta) CompactAll(ctx Context, threads int, bar *utils.Bar) syscall. return 0 } +func (m *baseMeta) Compact(ctx Context, inode Ino, concurrency int, preFunc, postFunc func()) syscall.Errno { + var attr Attr + if st := m.GetAttr(ctx, inode, &attr); st != 0 { + logger.Errorf("get attr error [inode %v]: %v", inode, st) + return st + } + + var wg sync.WaitGroup + // compact + chunkChan := make(chan cchunk, 10000) + for i := 0; i < concurrency; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for c := range chunkChan { + m.en.compactChunk(c.inode, c.indx, true) + postFunc() + } + }() + } + + // scan + st := m.walk(ctx, inode, "", &attr, func(ctx Context, fIno Ino, path string, fAttr *Attr) { + if fAttr.Typ != TypeFile { + return + } + // calc chunk index in local + chunkCnt := uint32((fAttr.Length + ChunkSize - 1) / ChunkSize) + for i := uint32(0); i < chunkCnt; i++ { + preFunc() + chunkChan <- cchunk{inode: fIno, indx: i} + } + }) + + // finish + close(chunkChan) + wg.Wait() + + if st != 0 { + logger.Errorf("walk error [inode %v]: %v", inode, st) + } + return st +} + func (m *baseMeta) fileDeleted(opened, force bool, inode Ino, length uint64) { if opened { m.Lock() diff --git a/pkg/meta/context.go b/pkg/meta/context.go index fbaa450be1c9..712d317836f5 100644 --- a/pkg/meta/context.go +++ b/pkg/meta/context.go @@ -18,15 +18,8 @@ package meta import ( "context" - "strconv" ) -type Ino uint64 - -func (i Ino) String() string { - return strconv.FormatUint(uint64(i), 10) -} - type CtxKey string type Context interface { diff --git a/pkg/meta/interface.go b/pkg/meta/interface.go index 973bb9384e9b..7a90518e2874 100644 --- a/pkg/meta/interface.go +++ b/pkg/meta/interface.go @@ -22,6 +22,7 @@ import ( "io" "net/url" "os" + "strconv" "strings" "sync/atomic" "syscall" @@ -52,6 +53,8 @@ const ( Clone = 1006 // OpSummary is a message to get tree summary of directories. OpSummary = 1007 + // CompactPath is a message to trigger compact + CompactPath = 1008 ) const ( @@ -102,8 +105,24 @@ const ( const MaxName = 255 const MaxSymlink = 4096 + +type Ino uint64 + const RootInode Ino = 1 const TrashInode Ino = 0x7FFFFFFF10000000 // larger than vfs.minInternalNode + +func (i Ino) String() string { + return strconv.FormatUint(uint64(i), 10) +} + +func (i Ino) IsValid() bool { + return i >= RootInode +} + +func (i Ino) IsTrash() bool { + return i >= TrashInode +} + var TrashName = ".trash" func isTrash(ino Ino) bool { @@ -416,6 +435,9 @@ type Meta interface { // Compact all the chunks by merge small slices together CompactAll(ctx Context, threads int, bar *utils.Bar) syscall.Errno + // Compact chunks for specified path + Compact(ctx Context, inode Ino, concurrency int, preFunc, postFunc func()) syscall.Errno + // ListSlices returns all slices used by all files. ListSlices(ctx Context, slices map[Ino][]Slice, delete bool, showProgress func()) syscall.Errno // Remove all files and directories recursively. diff --git a/pkg/vfs/internal.go b/pkg/vfs/internal.go index be3df660bb64..964674659e48 100644 --- a/pkg/vfs/internal.go +++ b/pkg/vfs/internal.go @@ -503,6 +503,25 @@ func (v *VFS) handleInternalMsg(ctx meta.Context, cmd uint32, r *utils.Buffer, o w.Put32(uint32(len(data))) w.Put(data) _, _ = out.Write(w.Bytes()) + case meta.CompactPath: + inode := Ino(r.Get64()) + coCnt := r.Get16() + + done := make(chan struct{}) + var totalChunks, currChunks uint64 + var eno syscall.Errno + go func() { + eno = v.Meta.Compact(ctx, inode, int(coCnt), func() { + atomic.AddUint64(&totalChunks, 1) + }, func() { + atomic.AddUint64(&currChunks, 1) + }) + close(done) + }() + + writeProgress(&totalChunks, &currChunks, out, done) + _, _ = out.Write([]byte{uint8(eno)}) + case meta.FillCache: paths := strings.Split(string(r.Get(int(r.Get32()))), "\n") concurrent := r.Get16()