diff --git a/cmd/dump.go b/cmd/dump.go index 796afedacdcf..b14924c5df18 100644 --- a/cmd/dump.go +++ b/cmd/dump.go @@ -25,6 +25,7 @@ import ( "github.com/DataDog/zstd" "github.com/juicedata/juicefs/pkg/meta" + "github.com/juicedata/juicefs/pkg/utils" "github.com/urfave/cli/v2" ) @@ -33,10 +34,11 @@ func cmdDump() *cli.Command { Name: "dump", Action: dump, Category: "ADMIN", - Usage: "Dump metadata into a JSON file", + Usage: "Dump metadata into a file", ArgsUsage: "META-URL [FILE]", Description: ` -Dump metadata of the volume in JSON format so users are able to see its content in an easy way. +Supports two formats: JSON format and binary format. +1. Dump metadata of the volume in JSON format so users are able to see its content in an easy way. Output of this command can be loaded later into an empty database, serving as a method to backup metadata or to change metadata engine. @@ -47,6 +49,11 @@ $ juicefs dump redis://localhost meta-dump.json.gz # Dump only a subtree of the volume to STDOUT $ juicefs dump redis://localhost --subdir /dir/in/jfs +2. Binary format is more compact, faster, and memory-efficient. + +Examples: +$ juicefs dump redis://localhost meta-dump.bin --binary + Details: https://juicefs.com/docs/community/metadata_dump_load`, Flags: []cli.Flag{ &cli.StringFlag{ @@ -64,17 +71,21 @@ Details: https://juicefs.com/docs/community/metadata_dump_load`, }, &cli.BoolFlag{ Name: "fast", - Usage: "speedup dump by load all metadata into memory", + Usage: "speedup dump by load all metadata into memory (only works with JSON format and DB/KV engine)", }, &cli.BoolFlag{ Name: "skip-trash", Usage: "skip files in trash", }, + &cli.BoolFlag{ + Name: "binary", + Usage: "dump metadata into a binary file (different from original JSON format, subdir/fast/skip-trash will be ignored)", + }, }, } } -func dumpMeta(m meta.Meta, dst string, threads int, keepSecret, fast, skipTrash bool) (err error) { +func dumpMeta(m meta.Meta, dst string, threads int, keepSecret, fast, skipTrash, isBinary bool) (err error) { var w io.WriteCloser if dst == "" { w = os.Stdout @@ -107,6 +118,23 @@ func dumpMeta(m meta.Meta, dst string, threads int, keepSecret, fast, skipTrash w = fp } } + if isBinary { + progress := utils.NewProgress(false) + defer progress.Done() + + bars := make(map[string]*utils.Bar) + for _, name := range meta.SegType2Name { + bars[name] = progress.AddCountSpinner(name) + } + + return m.DumpMetaV2(meta.Background(), w, &meta.DumpOption{ + KeepSecret: keepSecret, + Threads: threads, + Progress: func(name string, cnt int) { + bars[name].IncrBy(cnt) + }, + }) + } return m.DumpMeta(w, 1, threads, keepSecret, fast, skipTrash) } @@ -134,7 +162,8 @@ func dump(ctx *cli.Context) error { logger.Warnf("Invalid threads number %d, reset to 1", threads) threads = 1 } - err := dumpMeta(m, dst, threads, ctx.Bool("keep-secret-key"), ctx.Bool("fast"), ctx.Bool("skip-trash")) + + err := dumpMeta(m, dst, threads, ctx.Bool("keep-secret-key"), ctx.Bool("fast"), ctx.Bool("skip-trash"), ctx.Bool("binary")) if err == nil { if dst == "" { dst = "STDOUT" diff --git a/cmd/load.go b/cmd/load.go index 6e65a8697850..ae799c53b77e 100644 --- a/cmd/load.go +++ b/cmd/load.go @@ -20,6 +20,7 @@ import ( "compress/gzip" "crypto/x509" "encoding/pem" + "errors" "fmt" "io" "os" @@ -28,6 +29,7 @@ import ( "github.com/DataDog/zstd" "github.com/juicedata/juicefs/pkg/object" + "github.com/olekukonko/tablewriter" "github.com/juicedata/juicefs/pkg/meta" "github.com/juicedata/juicefs/pkg/utils" @@ -49,17 +51,32 @@ func cmdLoad() *cli.Command { Usage: "encrypt algorithm (aes256gcm-rsa, chacha20-rsa)", Value: object.AES256GCM_RSA, }, + &cli.BoolFlag{ + Name: "binary", + Usage: "load metadata from a binary file (different from original JSON format)", + }, + &cli.BoolFlag{ + Name: "stat", + Usage: "show statistics of the metadata binary file", + }, + &cli.IntFlag{ + Name: "threads", + Value: 10, + Usage: "number of threads to load binary metadata, only works with --binary", + }, }, - Usage: "Load metadata from a previously dumped JSON file", + Usage: "Load metadata from a previously dumped file", ArgsUsage: "META-URL [FILE]", Description: ` -Load metadata into an empty metadata engine. +Load metadata into an empty metadata engine or show statistics of the backup file. WARNING: Do NOT use new engine and the old one at the same time, otherwise it will probably break consistency of the volume. Examples: $ juicefs load redis://localhost/1 meta-dump.json.gz +$ juicefs load redis://localhost/1 meta-dump.bin --binary --threads 10 +$ juicefs load meta-dump.bin --binary --stat Details: https://juicefs.com/docs/community/metadata_dump_load`, } @@ -67,6 +84,11 @@ Details: https://juicefs.com/docs/community/metadata_dump_load`, func load(ctx *cli.Context) error { setup(ctx, 1) + + if ctx.Bool("binary") && ctx.Bool("stat") { + return statBak(ctx) + } + metaUri := ctx.Args().Get(0) src := ctx.Args().Get(1) removePassword(metaUri) @@ -132,10 +154,30 @@ func load(ctx *cli.Context) error { } m := meta.NewClient(metaUri, nil) if format, err := m.Load(false); err == nil { - return fmt.Errorf("Database %s is used by volume %s", utils.RemovePassword(metaUri), format.Name) + return fmt.Errorf("database %s is used by volume %s", utils.RemovePassword(metaUri), format.Name) } - if err := m.LoadMeta(r); err != nil { - return err + + if ctx.Bool("binary") { + progress := utils.NewProgress(false) + bars := make(map[string]*utils.Bar) + for _, name := range meta.SegType2Name { + bars[name] = progress.AddCountSpinner(name) + } + + opt := &meta.LoadOption{ + Threads: ctx.Int("threads"), + Progress: func(name string, cnt int) { + bars[name].IncrBy(cnt) + }, + } + if err := m.LoadMetaV2(meta.WrapContext(ctx.Context), r, opt); err != nil { + return err + } + progress.Done() + } else { + if err := m.LoadMeta(r); err != nil { + return err + } } if format, err := m.Load(true); err == nil { if format.SecretKey == "removed" { @@ -147,3 +189,35 @@ func load(ctx *cli.Context) error { logger.Infof("Load metadata from %s succeed", src) return nil } + +func statBak(ctx *cli.Context) error { + path := ctx.Args().Get(0) + if path == "" { + return errors.New("missing file path") + } + + fp, err := os.Open(path) + if err != nil { + return fmt.Errorf("failed to open file %s: %w", path, err) + } + bak := &meta.BakFormat{} + footer, err := bak.ReadFooter(fp) + if err != nil { + return fmt.Errorf("failed to read footer: %w", err) + } + + fmt.Printf("Backup Version: %d\n", footer.Msg.Version) + data := make([][]string, 0, len(footer.Msg.Infos)) + for name, info := range footer.Msg.Infos { + data = append(data, []string{name, fmt.Sprintf("%d", info.Num), fmt.Sprintf("%+v", info.Offset)}) + } + + table := tablewriter.NewWriter(os.Stdout) + table.SetHeader([]string{"Name", "Num", "Offset in File"}) + + for _, v := range data { + table.Append(v) + } + table.Render() + return nil +} diff --git a/go.mod b/go.mod index fe5eaed6fd32..efab5e0e5e3b 100644 --- a/go.mod +++ b/go.mod @@ -46,6 +46,7 @@ require ( github.com/minio/minio v0.0.0-20210206053228-97fe57bba92c github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78 github.com/ncw/swift/v2 v2.0.1 + github.com/olekukonko/tablewriter v0.0.1 github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 github.com/pkg/errors v0.9.1 github.com/pkg/sftp v1.13.5 diff --git a/go.sum b/go.sum index fcd0f03628a3..fd13a6ff986f 100644 --- a/go.sum +++ b/go.sum @@ -668,6 +668,7 @@ github.com/nrdcg/namesilo v0.2.1/go.mod h1:lwMvfQTyYq+BbjJd30ylEG4GPSS6PII0Tia4r github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= +github.com/olekukonko/tablewriter v0.0.1 h1:b3iUnf1v+ppJiOfNX4yxxqfWKMQPZR5yoh8urCTFX88= github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/oliverisaac/shellescape v0.0.0-20220131224704-1b6c6b87b668 h1:WUilXdVrxYH+fFkmstviAOj1o9CfoW5O/Sd0LWPIVUA= github.com/oliverisaac/shellescape v0.0.0-20220131224704-1b6c6b87b668/go.mod h1:EDgl+cvbmeOQUMTTH94gjXVtFHr8xDe5BiXhWn7Hf1E= diff --git a/pkg/meta/backup.go b/pkg/meta/backup.go index 8b3416167ebd..d75642d612c5 100644 --- a/pkg/meta/backup.go +++ b/pkg/meta/backup.go @@ -55,6 +55,23 @@ const ( segTypeMax ) +var SegType2Name = map[int]string{ + segTypeFormat: "format", + segTypeCounter: "counter", + segTypeNode: "node", + segTypeEdge: "edge", + segTypeChunk: "chunk", + segTypeSliceRef: "sliceRef", + segTypeSymlink: "symlink", + segTypeSustained: "sustained", + segTypeDelFile: "delFile", + segTypeXattr: "xattr", + segTypeAcl: "acl", + segTypeStat: "stat", + segTypeQuota: "quota", + segTypeParent: "parent", +} + var errBakEOF = fmt.Errorf("reach backup EOF") func getMessageFromType(typ int) (proto.Message, error) { @@ -78,16 +95,16 @@ func createMessageByName(name protoreflect.FullName) (proto.Message, error) { return typ.New().Interface(), nil } -// bakFormat: BakSegment... + BakEOS + BakFooter -type bakFormat struct { - pos uint64 - footer *bakFooter +// BakFormat: BakSegment... + BakEOS + BakFooter +type BakFormat struct { + Pos uint64 + Footer *BakFooter } -func newBakFormat() *bakFormat { - return &bakFormat{ - footer: &bakFooter{ - msg: &pb.Footer{ +func newBakFormat() *BakFormat { + return &BakFormat{ + Footer: &BakFooter{ + Msg: &pb.Footer{ Magic: BakMagic, Version: BakVersion, Infos: make(map[string]*pb.Footer_SegInfo), @@ -96,7 +113,7 @@ func newBakFormat() *bakFormat { } } -func (f *bakFormat) writeSegment(w io.Writer, seg *bakSegment) error { +func (f *BakFormat) writeSegment(w io.Writer, seg *bakSegment) error { if seg == nil { return nil } @@ -107,19 +124,19 @@ func (f *bakFormat) writeSegment(w io.Writer, seg *bakSegment) error { } name := seg.String() - info, ok := f.footer.msg.Infos[name] + info, ok := f.Footer.Msg.Infos[name] if !ok { info = &pb.Footer_SegInfo{Offset: []uint64{}, Num: 0} - f.footer.msg.Infos[name] = info + f.Footer.Msg.Infos[name] = info } - info.Offset = append(info.Offset, f.pos) + info.Offset = append(info.Offset, f.Pos) info.Num += seg.num() - f.pos += uint64(n) + f.Pos += uint64(n) return nil } -func (f *bakFormat) readSegment(r io.Reader) (*bakSegment, error) { +func (f *BakFormat) readSegment(r io.Reader) (*bakSegment, error) { seg := &bakSegment{} if err := seg.Unmarshal(r); err != nil { return nil, err @@ -127,39 +144,39 @@ func (f *bakFormat) readSegment(r io.Reader) (*bakSegment, error) { return seg, nil } -func (f *bakFormat) writeFooter(w io.Writer) error { +func (f *BakFormat) writeFooter(w io.Writer) error { if err := f.writeEOS(w); err != nil { return err } - return f.footer.Marshal(w) + return f.Footer.Marshal(w) } -func (f *bakFormat) writeEOS(w io.Writer) error { +func (f *BakFormat) writeEOS(w io.Writer) error { if n, err := w.Write(binary.BigEndian.AppendUint32(nil, BakEOS)); err != nil && n != 4 { return fmt.Errorf("failed to write EOS: err %w, write len %d, expect len 4", err, n) } return nil } -func (f *bakFormat) readFooter(r io.ReadSeeker) (*bakFooter, error) { // nolint:unused - footer := &bakFooter{} +func (f *BakFormat) ReadFooter(r io.ReadSeeker) (*BakFooter, error) { // nolint:unused + footer := &BakFooter{} if err := footer.Unmarshal(r); err != nil { return nil, err } - if footer.msg.Magic != BakMagic { - return nil, fmt.Errorf("invalid magic number %d, expect %d", footer.msg.Magic, BakMagic) + if footer.Msg.Magic != BakMagic { + return nil, fmt.Errorf("invalid magic number %d, expect %d", footer.Msg.Magic, BakMagic) } - f.footer = footer + f.Footer = footer return footer, nil } -type bakFooter struct { - msg *pb.Footer - len uint64 +type BakFooter struct { + Msg *pb.Footer + Len uint64 } -func (h *bakFooter) Marshal(w io.Writer) error { - data, err := proto.Marshal(h.msg) +func (h *BakFooter) Marshal(w io.Writer) error { + data, err := proto.Marshal(h.Msg) if err != nil { return fmt.Errorf("failed to marshal footer: %w", err) } @@ -168,15 +185,15 @@ func (h *bakFooter) Marshal(w io.Writer) error { return fmt.Errorf("failed to write footer data: err %w, write len %d, expect len %d", err, n, len(data)) } - h.len = uint64(len(data)) - if n, err := w.Write(binary.BigEndian.AppendUint64(nil, h.len)); err != nil && n != 8 { + h.Len = uint64(len(data)) + if n, err := w.Write(binary.BigEndian.AppendUint64(nil, h.Len)); err != nil && n != 8 { return fmt.Errorf("failed to write footer length: err %w, write len %d, expect len 8", err, n) } return nil } -func (h *bakFooter) Unmarshal(r io.ReadSeeker) error { - lenSize := int64(unsafe.Sizeof(h.len)) +func (h *BakFooter) Unmarshal(r io.ReadSeeker) error { + lenSize := int64(unsafe.Sizeof(h.Len)) _, _ = r.Seek(-lenSize, io.SeekEnd) data := make([]byte, lenSize) @@ -184,15 +201,15 @@ func (h *bakFooter) Unmarshal(r io.ReadSeeker) error { return fmt.Errorf("failed to read footer length: err %w, read len %d, expect len %d", err, n, lenSize) } - h.len = binary.BigEndian.Uint64(data) - _, _ = r.Seek(-int64(h.len)-lenSize, io.SeekEnd) - data = make([]byte, h.len) - if n, err := r.Read(data); err != nil && n != int(h.len) { - return fmt.Errorf("failed to read footer: err %w, read len %d, expect len %d", err, n, h.len) + h.Len = binary.BigEndian.Uint64(data) + _, _ = r.Seek(-int64(h.Len)-lenSize, io.SeekEnd) + data = make([]byte, h.Len) + if n, err := r.Read(data); err != nil && n != int(h.Len) { + return fmt.Errorf("failed to read footer: err %w, read len %d, expect len %d", err, n, h.Len) } - h.msg = &pb.Footer{} - if err := proto.Unmarshal(data, h.msg); err != nil { + h.Msg = &pb.Footer{} + if err := proto.Unmarshal(data, h.Msg); err != nil { return fmt.Errorf("failed to unmarshal footer: %w", err) } return nil @@ -205,9 +222,51 @@ type bakSegment struct { } func (s *bakSegment) String() string { + if name, ok := SegType2Name[int(s.typ)]; ok { + return name + } return fmt.Sprintf("type-%d", s.typ) } +func newBakSegment(val proto.Message) *bakSegment { + s := &bakSegment{val: val} + switch v := s.val.(type) { + case *pb.Format: + s.typ = uint32(segTypeFormat) + case *pb.Batch: + if v.Counters != nil { + s.typ = uint32(segTypeCounter) + } else if v.Sustained != nil { + s.typ = uint32(segTypeSustained) + } else if v.Delfiles != nil { + s.typ = uint32(segTypeDelFile) + } else if v.Acls != nil { + s.typ = uint32(segTypeAcl) + } else if v.Xattrs != nil { + s.typ = uint32(segTypeXattr) + } else if v.Quotas != nil { + s.typ = uint32(segTypeQuota) + } else if v.Dirstats != nil { + s.typ = uint32(segTypeStat) + } else if v.Nodes != nil { + s.typ = uint32(segTypeNode) + } else if v.Chunks != nil { + s.typ = uint32(segTypeChunk) + } else if v.SliceRefs != nil { + s.typ = uint32(segTypeSliceRef) + } else if v.Edges != nil { + s.typ = uint32(segTypeEdge) + } else if v.Symlinks != nil { + s.typ = uint32(segTypeSymlink) + } else if v.Parents != nil { + s.typ = uint32(segTypeParent) + } else { + return nil + } + } + return s +} + func (s *bakSegment) num() uint64 { switch s.typ { case segTypeFormat: @@ -251,41 +310,6 @@ func (s *bakSegment) Marshal(w io.Writer) (int, error) { return 0, fmt.Errorf("segment %s is nil", s) } - switch v := s.val.(type) { - case *pb.Format: - s.typ = uint32(segTypeFormat) - case *pb.Batch: - if v.Counters != nil { - s.typ = uint32(segTypeCounter) - } else if v.Sustained != nil { - s.typ = uint32(segTypeSustained) - } else if v.Delfiles != nil { - s.typ = uint32(segTypeDelFile) - } else if v.Acls != nil { - s.typ = uint32(segTypeAcl) - } else if v.Xattrs != nil { - s.typ = uint32(segTypeXattr) - } else if v.Quotas != nil { - s.typ = uint32(segTypeQuota) - } else if v.Dirstats != nil { - s.typ = uint32(segTypeStat) - } else if v.Nodes != nil { - s.typ = uint32(segTypeNode) - } else if v.Chunks != nil { - s.typ = uint32(segTypeChunk) - } else if v.SliceRefs != nil { - s.typ = uint32(segTypeSliceRef) - } else if v.Edges != nil { - s.typ = uint32(segTypeEdge) - } else if v.Symlinks != nil { - s.typ = uint32(segTypeSymlink) - } else if v.Parents != nil { - s.typ = uint32(segTypeParent) - } else { - return 0, fmt.Errorf("unknown batch type %s", s) - } - } - if err := binary.Write(w, binary.BigEndian, s.typ); err != nil { return 0, fmt.Errorf("failed to write segment type %s : %w", s, err) } @@ -337,6 +361,7 @@ func (s *bakSegment) Unmarshal(r io.Reader) error { type DumpOption struct { KeepSecret bool Threads int + Progress func(name string, cnt int) } func (opt *DumpOption) check() *DumpOption { @@ -377,7 +402,8 @@ func dumpResult(ctx context.Context, ch chan<- *dumpedResult, res *dumpedResult) } type LoadOption struct { - Threads int + Threads int + Progress func(name string, cnt int) } func (opt *LoadOption) check() { diff --git a/pkg/meta/base.go b/pkg/meta/base.go index 2d04cef2619b..fdebf03858cb 100644 --- a/pkg/meta/base.go +++ b/pkg/meta/base.go @@ -3118,13 +3118,16 @@ func (m *baseMeta) DumpMetaV2(ctx Context, w io.Writer, opt *DumpOption) error { if res == nil { break } - seg := &bakSegment{val: res.msg} + seg := newBakSegment(res.msg) if err := bak.writeSegment(w, seg); err != nil { logger.Errorf("write %d err: %v", seg.typ, err) ctx.Cancel() wg.Wait() return err } + if opt.Progress != nil { + opt.Progress(seg.String(), int(seg.num())) + } if res.release != nil { res.release(res.msg) } @@ -3176,7 +3179,7 @@ func (m *baseMeta) LoadMetaV2(ctx Context, r io.Reader, opt *LoadOption) error { go workerFunc(ctx, taskCh) } - bak := &bakFormat{} + bak := &BakFormat{} for { seg, err := bak.readSegment(r) if err != nil { @@ -3194,6 +3197,9 @@ func (m *baseMeta) LoadMetaV2(ctx Context, r io.Reader, opt *LoadOption) error { wg.Wait() return ctx.Err() case taskCh <- &task{int(seg.typ), seg.val}: + if opt.Progress != nil { + opt.Progress(seg.String(), int(seg.num())) + } } } wg.Wait() diff --git a/pkg/meta/interface.go b/pkg/meta/interface.go index 68ec02b23bd1..8ee07993a315 100644 --- a/pkg/meta/interface.go +++ b/pkg/meta/interface.go @@ -455,7 +455,7 @@ type Meta interface { DumpMeta(w io.Writer, root Ino, threads int, keepSecret, fast, skipTrash bool) error LoadMeta(r io.Reader) error - DumpMetaV2(ctx Context, w io.Writer, opt *DumpOption) (err error) + DumpMetaV2(ctx Context, w io.Writer, opt *DumpOption) error LoadMetaV2(ctx Context, r io.Reader, opt *LoadOption) error // getBase return the base engine. diff --git a/pkg/meta/tkv_bak.go b/pkg/meta/tkv_bak.go index 65f50165d932..5d4d044a69d6 100644 --- a/pkg/meta/tkv_bak.go +++ b/pkg/meta/tkv_bak.go @@ -684,7 +684,7 @@ func (m *kvMeta) LoadMetaV2(ctx Context, r io.Reader, opt *LoadOption) error { wg.Add(1) go workerFunc(ctx, taskCh) - bak := &bakFormat{} + bak := &BakFormat{} for { seg, err := bak.readSegment(r) if err != nil {