From acb9e23163e79097d4644fba251d67315b0ed790 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Wed, 4 Jul 2018 23:12:12 -0300 Subject: [PATCH 1/2] pbdagreader: use FSNode instead of protobuf structure Focus on the UnixFS layer and avoid explicit references to protocol buffers format (used to serialize objects of that layer). Use the `unixfs.FSNode` structure which it abstracts from the `unixfs.pb.Data` format. Replace `PBDagReader` field `ftpb.Data` with `ft.FSNode`, renaming it to `file` (which is the type of UnixFS object represented in the reader) and changing its comment removing the "cached" reference, as this structure is not used here as a cache (`PBDagReader` doesn't modify the DAG, it's read-only). Also, removed unused `ProtoNode` field to avoid confusions, as it would normally be present if the `FSNode` was in fact used as a cache of the contents of the `ProtoNode`. An example of the advantage of shifting the focus from the format to the UnixFS layer is dropping the of use `len(pb.Blocksizes)` in favor of the more clear `NumChildren()` abstraction. Added `BlockSize()` accessor. License: MIT Signed-off-by: Lucas Molas --- unixfs/archive/tar/writer.go | 17 +++++++------- unixfs/io/dagreader.go | 9 ++++---- unixfs/io/pbdagreader.go | 45 +++++++++++++++--------------------- unixfs/unixfs.go | 6 +++++ 4 files changed, 36 insertions(+), 41 deletions(-) diff --git a/unixfs/archive/tar/writer.go b/unixfs/archive/tar/writer.go index 61ceb2e9824..04eda65d490 100644 --- a/unixfs/archive/tar/writer.go +++ b/unixfs/archive/tar/writer.go @@ -16,7 +16,6 @@ import ( upb "github.com/ipfs/go-ipfs/unixfs/pb" ipld "gx/ipfs/QmWi2BYBL5gJ3CiAiQchg6rn1A8iBsrWy51EYxvHVjFvLb/go-ipld-format" - proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto" ) // Writer is a utility structure that helps to write @@ -57,12 +56,12 @@ func (w *Writer) writeDir(nd *mdag.ProtoNode, fpath string) error { }) } -func (w *Writer) writeFile(nd *mdag.ProtoNode, pb *upb.Data, fpath string) error { - if err := writeFileHeader(w.TarW, fpath, pb.GetFilesize()); err != nil { +func (w *Writer) writeFile(nd *mdag.ProtoNode, fsNode *ft.FSNode, fpath string) error { + if err := writeFileHeader(w.TarW, fpath, fsNode.FileSize()); err != nil { return err } - dagr := uio.NewPBFileReader(w.ctx, nd, pb, w.Dag) + dagr := uio.NewPBFileReader(w.ctx, nd, fsNode, w.Dag) if _, err := dagr.WriteTo(w.TarW); err != nil { return err } @@ -74,12 +73,12 @@ func (w *Writer) writeFile(nd *mdag.ProtoNode, pb *upb.Data, fpath string) error func (w *Writer) WriteNode(nd ipld.Node, fpath string) error { switch nd := nd.(type) { case *mdag.ProtoNode: - pb := new(upb.Data) - if err := proto.Unmarshal(nd.Data(), pb); err != nil { + fsNode, err := ft.FSNodeFromBytes(nd.Data()) + if err != nil { return err } - switch pb.GetType() { + switch fsNode.GetType() { case upb.Data_Metadata: fallthrough case upb.Data_Directory, upb.Data_HAMTShard: @@ -87,9 +86,9 @@ func (w *Writer) WriteNode(nd ipld.Node, fpath string) error { case upb.Data_Raw: fallthrough case upb.Data_File: - return w.writeFile(nd, pb, fpath) + return w.writeFile(nd, fsNode, fpath) case upb.Data_Symlink: - return writeSymlinkHeader(w.TarW, string(pb.GetData()), fpath) + return writeSymlinkHeader(w.TarW, string(fsNode.GetData()), fpath) default: return ft.ErrUnrecognizedType } diff --git a/unixfs/io/dagreader.go b/unixfs/io/dagreader.go index e3f79573213..1b4c48571a3 100644 --- a/unixfs/io/dagreader.go +++ b/unixfs/io/dagreader.go @@ -11,7 +11,6 @@ import ( ftpb "github.com/ipfs/go-ipfs/unixfs/pb" ipld "gx/ipfs/QmWi2BYBL5gJ3CiAiQchg6rn1A8iBsrWy51EYxvHVjFvLb/go-ipld-format" - proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto" ) // Common errors @@ -45,17 +44,17 @@ func NewDagReader(ctx context.Context, n ipld.Node, serv ipld.NodeGetter) (DagRe case *mdag.RawNode: return NewBufDagReader(n.RawData()), nil case *mdag.ProtoNode: - pb := new(ftpb.Data) - if err := proto.Unmarshal(n.Data(), pb); err != nil { + fsNode, err := ft.FSNodeFromBytes(n.Data()) + if err != nil { return nil, err } - switch pb.GetType() { + switch fsNode.GetType() { case ftpb.Data_Directory, ftpb.Data_HAMTShard: // Dont allow reading directories return nil, ErrIsDir case ftpb.Data_File, ftpb.Data_Raw: - return NewPBFileReader(ctx, n, pb, serv), nil + return NewPBFileReader(ctx, n, fsNode, serv), nil case ftpb.Data_Metadata: if len(n.Links()) == 0 { return nil, errors.New("incorrectly formatted metadata object") diff --git a/unixfs/io/pbdagreader.go b/unixfs/io/pbdagreader.go index ce53d67118d..84cef04dcc3 100644 --- a/unixfs/io/pbdagreader.go +++ b/unixfs/io/pbdagreader.go @@ -11,7 +11,6 @@ import ( ftpb "github.com/ipfs/go-ipfs/unixfs/pb" ipld "gx/ipfs/QmWi2BYBL5gJ3CiAiQchg6rn1A8iBsrWy51EYxvHVjFvLb/go-ipld-format" - proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto" cid "gx/ipfs/QmapdYm1b22Frv3k17fqrBYTFRxwiaVJkB299Mfn33edeB/go-cid" ) @@ -19,11 +18,8 @@ import ( type PBDagReader struct { serv ipld.NodeGetter - // the node being read - node *mdag.ProtoNode - - // cached protobuf structure from node.Data - pbdata *ftpb.Data + // UnixFS file (it should be of type `Data_File` or `Data_Raw` only). + file *ft.FSNode // the current data buffer to be read from // will either be a bytes.Reader or a child DagReader @@ -51,18 +47,17 @@ type PBDagReader struct { var _ DagReader = (*PBDagReader)(nil) // NewPBFileReader constructs a new PBFileReader. -func NewPBFileReader(ctx context.Context, n *mdag.ProtoNode, pb *ftpb.Data, serv ipld.NodeGetter) *PBDagReader { +func NewPBFileReader(ctx context.Context, n *mdag.ProtoNode, file *ft.FSNode, serv ipld.NodeGetter) *PBDagReader { fctx, cancel := context.WithCancel(ctx) curLinks := getLinkCids(n) return &PBDagReader{ - node: n, serv: serv, - buf: NewBufDagReader(pb.GetData()), + buf: NewBufDagReader(file.GetData()), promises: make([]*ipld.NodePromise, len(curLinks)), links: curLinks, ctx: fctx, cancel: cancel, - pbdata: pb, + file: file, } } @@ -105,21 +100,20 @@ func (dr *PBDagReader) precalcNextBuf(ctx context.Context) error { switch nxt := nxt.(type) { case *mdag.ProtoNode: - pb := new(ftpb.Data) - err = proto.Unmarshal(nxt.Data(), pb) + fsNode, err := ft.FSNodeFromBytes(nxt.Data()) if err != nil { return fmt.Errorf("incorrectly formatted protobuf: %s", err) } - switch pb.GetType() { + switch fsNode.GetType() { case ftpb.Data_Directory, ftpb.Data_HAMTShard: // A directory should not exist within a file return ft.ErrInvalidDirLocation case ftpb.Data_File: - dr.buf = NewPBFileReader(dr.ctx, nxt, pb, dr.serv) + dr.buf = NewPBFileReader(dr.ctx, nxt, fsNode, dr.serv) return nil case ftpb.Data_Raw: - dr.buf = NewBufDagReader(pb.GetData()) + dr.buf = NewBufDagReader(fsNode.GetData()) return nil case ftpb.Data_Metadata: return errors.New("shouldnt have had metadata object inside file") @@ -146,7 +140,7 @@ func getLinkCids(n ipld.Node) []*cid.Cid { // Size return the total length of the data from the DAG structured file. func (dr *PBDagReader) Size() uint64 { - return dr.pbdata.GetFilesize() + return dr.file.FileSize() } // Read reads data from the DAG structured file @@ -244,17 +238,14 @@ func (dr *PBDagReader) Seek(offset int64, whence int) (int64, error) { return offset, nil } - // Grab cached protobuf object (solely to make code look cleaner) - pb := dr.pbdata - // left represents the number of bytes remaining to seek to (from beginning) left := offset - if int64(len(pb.Data)) >= offset { + if int64(len(dr.file.GetData())) >= offset { // Close current buf to close potential child dagreader if dr.buf != nil { dr.buf.Close() } - dr.buf = NewBufDagReader(pb.GetData()[offset:]) + dr.buf = NewBufDagReader(dr.file.GetData()[offset:]) // start reading links from the beginning dr.linkPosition = 0 @@ -263,15 +254,15 @@ func (dr *PBDagReader) Seek(offset int64, whence int) (int64, error) { } // skip past root block data - left -= int64(len(pb.Data)) + left -= int64(len(dr.file.GetData())) // iterate through links and find where we need to be - for i := 0; i < len(pb.Blocksizes); i++ { - if pb.Blocksizes[i] > uint64(left) { + for i := 0; i < dr.file.NumChildren(); i++ { + if dr.file.BlockSize(i) > uint64(left) { dr.linkPosition = i break } else { - left -= int64(pb.Blocksizes[i]) + left -= int64(dr.file.BlockSize(i)) } } @@ -303,14 +294,14 @@ func (dr *PBDagReader) Seek(offset int64, whence int) (int64, error) { noffset := dr.offset + offset return dr.Seek(noffset, io.SeekStart) case io.SeekEnd: - noffset := int64(dr.pbdata.GetFilesize()) - offset + noffset := int64(dr.file.FileSize()) - offset n, err := dr.Seek(noffset, io.SeekStart) // Return negative number if we can't figure out the file size. Using io.EOF // for this seems to be good(-enough) solution as it's only returned by // precalcNextBuf when we step out of file range. // This is needed for gateway to function properly - if err == io.EOF && *dr.pbdata.Type == ftpb.Data_File { + if err == io.EOF && dr.file.GetType() == ftpb.Data_File { return -1, nil } return n, err diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index 3ba01fb0fc4..f08da941519 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -195,6 +195,12 @@ func (n *FSNode) RemoveBlockSize(i int) { n.format.Blocksizes = append(n.format.Blocksizes[:i], n.format.Blocksizes[i+1:]...) } +// BlockSize returns the block size indexed by `i`. +// TODO: Evaluate if this function should be bounds checking. +func (n *FSNode) BlockSize(i int) uint64 { + return n.format.Blocksizes[i] +} + // GetBytes marshals this node as a protobuf message. func (n *FSNode) GetBytes() ([]byte, error) { return proto.Marshal(&n.format) From 18361d38496228a40a72226ea4213cb0bf01bde3 Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Fri, 6 Jul 2018 13:22:19 -0300 Subject: [PATCH 2/2] unixfs: remove `Get` prefix from `FSNode` accessors See https://golang.org/doc/effective_go.html#Getters. License: MIT Signed-off-by: Lucas Molas --- importer/helpers/helpers.go | 2 +- mfs/file.go | 2 +- mfs/mfs_test.go | 2 +- unixfs/archive/tar/writer.go | 4 ++-- unixfs/io/dagreader.go | 2 +- unixfs/io/pbdagreader.go | 14 +++++++------- unixfs/unixfs.go | 10 +++++----- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/importer/helpers/helpers.go b/importer/helpers/helpers.go index 64a22f5510a..5aa67b544c0 100644 --- a/importer/helpers/helpers.go +++ b/importer/helpers/helpers.go @@ -77,7 +77,7 @@ func (n *UnixfsNode) Set(other *UnixfsNode) { n.raw = other.raw n.rawnode = other.rawnode if other.ufmt != nil { - n.ufmt.SetData(other.ufmt.GetData()) + n.ufmt.SetData(other.ufmt.Data()) } } diff --git a/mfs/file.go b/mfs/file.go index f28cf79f834..40106d1daf1 100644 --- a/mfs/file.go +++ b/mfs/file.go @@ -60,7 +60,7 @@ func (fi *File) Open(flags int, sync bool) (FileDescriptor, error) { return nil, err } - switch fsn.GetType() { + switch fsn.Type() { default: return nil, fmt.Errorf("unsupported fsnode type for 'file'") case ft.TSymlink: diff --git a/mfs/mfs_test.go b/mfs/mfs_test.go index 7088c8c9cc4..1d9aee46f84 100644 --- a/mfs/mfs_test.go +++ b/mfs/mfs_test.go @@ -852,7 +852,7 @@ func TestFlushing(t *testing.T) { t.Fatal(err) } - if fsnode.GetType() != ft.TDirectory { + if fsnode.Type() != ft.TDirectory { t.Fatal("root wasnt a directory") } diff --git a/unixfs/archive/tar/writer.go b/unixfs/archive/tar/writer.go index 04eda65d490..0c2df9fb1e4 100644 --- a/unixfs/archive/tar/writer.go +++ b/unixfs/archive/tar/writer.go @@ -78,7 +78,7 @@ func (w *Writer) WriteNode(nd ipld.Node, fpath string) error { return err } - switch fsNode.GetType() { + switch fsNode.Type() { case upb.Data_Metadata: fallthrough case upb.Data_Directory, upb.Data_HAMTShard: @@ -88,7 +88,7 @@ func (w *Writer) WriteNode(nd ipld.Node, fpath string) error { case upb.Data_File: return w.writeFile(nd, fsNode, fpath) case upb.Data_Symlink: - return writeSymlinkHeader(w.TarW, string(fsNode.GetData()), fpath) + return writeSymlinkHeader(w.TarW, string(fsNode.Data()), fpath) default: return ft.ErrUnrecognizedType } diff --git a/unixfs/io/dagreader.go b/unixfs/io/dagreader.go index 1b4c48571a3..504c0b6f094 100644 --- a/unixfs/io/dagreader.go +++ b/unixfs/io/dagreader.go @@ -49,7 +49,7 @@ func NewDagReader(ctx context.Context, n ipld.Node, serv ipld.NodeGetter) (DagRe return nil, err } - switch fsNode.GetType() { + switch fsNode.Type() { case ftpb.Data_Directory, ftpb.Data_HAMTShard: // Dont allow reading directories return nil, ErrIsDir diff --git a/unixfs/io/pbdagreader.go b/unixfs/io/pbdagreader.go index 84cef04dcc3..c0aa90922cb 100644 --- a/unixfs/io/pbdagreader.go +++ b/unixfs/io/pbdagreader.go @@ -52,7 +52,7 @@ func NewPBFileReader(ctx context.Context, n *mdag.ProtoNode, file *ft.FSNode, se curLinks := getLinkCids(n) return &PBDagReader{ serv: serv, - buf: NewBufDagReader(file.GetData()), + buf: NewBufDagReader(file.Data()), promises: make([]*ipld.NodePromise, len(curLinks)), links: curLinks, ctx: fctx, @@ -105,7 +105,7 @@ func (dr *PBDagReader) precalcNextBuf(ctx context.Context) error { return fmt.Errorf("incorrectly formatted protobuf: %s", err) } - switch fsNode.GetType() { + switch fsNode.Type() { case ftpb.Data_Directory, ftpb.Data_HAMTShard: // A directory should not exist within a file return ft.ErrInvalidDirLocation @@ -113,7 +113,7 @@ func (dr *PBDagReader) precalcNextBuf(ctx context.Context) error { dr.buf = NewPBFileReader(dr.ctx, nxt, fsNode, dr.serv) return nil case ftpb.Data_Raw: - dr.buf = NewBufDagReader(fsNode.GetData()) + dr.buf = NewBufDagReader(fsNode.Data()) return nil case ftpb.Data_Metadata: return errors.New("shouldnt have had metadata object inside file") @@ -240,12 +240,12 @@ func (dr *PBDagReader) Seek(offset int64, whence int) (int64, error) { // left represents the number of bytes remaining to seek to (from beginning) left := offset - if int64(len(dr.file.GetData())) >= offset { + if int64(len(dr.file.Data())) >= offset { // Close current buf to close potential child dagreader if dr.buf != nil { dr.buf.Close() } - dr.buf = NewBufDagReader(dr.file.GetData()[offset:]) + dr.buf = NewBufDagReader(dr.file.Data()[offset:]) // start reading links from the beginning dr.linkPosition = 0 @@ -254,7 +254,7 @@ func (dr *PBDagReader) Seek(offset int64, whence int) (int64, error) { } // skip past root block data - left -= int64(len(dr.file.GetData())) + left -= int64(len(dr.file.Data())) // iterate through links and find where we need to be for i := 0; i < dr.file.NumChildren(); i++ { @@ -301,7 +301,7 @@ func (dr *PBDagReader) Seek(offset int64, whence int) (int64, error) { // for this seems to be good(-enough) solution as it's only returned by // precalcNextBuf when we step out of file range. // This is needed for gateway to function properly - if err == io.EOF && dr.file.GetType() == ftpb.Data_File { + if err == io.EOF && dr.file.Type() == ftpb.Data_File { return -1, nil } return n, err diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index f08da941519..9cd9731ed9d 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -217,15 +217,15 @@ func (n *FSNode) NumChildren() int { return len(n.format.Blocksizes) } -// GetData retrieves the `Data` field from the internal `format`. -func (n *FSNode) GetData() []byte { +// Data retrieves the `Data` field from the internal `format`. +func (n *FSNode) Data() []byte { return n.format.GetData() } // SetData sets the `Data` field from the internal `format` // updating its `Filesize`. func (n *FSNode) SetData(newData []byte) { - n.UpdateFilesize(int64(len(newData) - len(n.GetData()))) + n.UpdateFilesize(int64(len(newData) - len(n.Data()))) n.format.Data = newData } @@ -237,8 +237,8 @@ func (n *FSNode) UpdateFilesize(filesizeDiff int64) { int64(n.format.GetFilesize()) + filesizeDiff)) } -// GetType retrieves the `Type` field from the internal `format`. -func (n *FSNode) GetType() pb.Data_DataType { +// Type retrieves the `Type` field from the internal `format`. +func (n *FSNode) Type() pb.Data_DataType { return n.format.GetType() }