diff --git a/cid-fmt/main.go b/cid-fmt/main.go index 7dacb8a..ac75098 100644 --- a/cid-fmt/main.go +++ b/cid-fmt/main.go @@ -20,7 +20,7 @@ func usage() { const fmtRef = ` %% literal % - %b multibase name + %b multibase name %B multibase code %v version string %V version number @@ -46,7 +46,7 @@ func main() { usage() } newBase := mb.Encoding(-1) - var verConv func(cid *c.Cid) (*c.Cid, error) + var verConv func(cid c.Cid) (c.Cid, error) args := os.Args[1:] outer: for { @@ -132,15 +132,15 @@ func errorMsg(fmtStr string, a ...interface{}) { exitCode = 1 } -func decode(v string) (mb.Encoding, *c.Cid, error) { +func decode(v string) (mb.Encoding, c.Cid, error) { if len(v) < 2 { - return 0, nil, c.ErrCidTooShort + return 0, c.EmptyCid, c.ErrCidTooShort } if len(v) == 46 && v[:2] == "Qm" { hash, err := mh.FromB58String(v) if err != nil { - return 0, nil, err + return 0, c.EmptyCid, err } return mb.Base58BTC, c.NewCidV0(hash), nil @@ -148,7 +148,7 @@ func decode(v string) (mb.Encoding, *c.Cid, error) { base, data, err := mb.Decode(v) if err != nil { - return 0, nil, err + return 0, c.EmptyCid, err } cid, err := c.Cast(data) @@ -158,7 +158,7 @@ func decode(v string) (mb.Encoding, *c.Cid, error) { const ERR_STR = "!ERROR!" -func fmtCid(fmtStr string, base mb.Encoding, cid *c.Cid) (string, error) { +func fmtCid(fmtStr string, base mb.Encoding, cid c.Cid) (string, error) { p := cid.Prefix() out := new(bytes.Buffer) var err error @@ -265,13 +265,13 @@ func encode(base mb.Encoding, data []byte, strip bool) string { return str } -func toCidV0(cid *c.Cid) (*c.Cid, error) { +func toCidV0(cid c.Cid) (c.Cid, error) { if cid.Type() != c.DagProtobuf { - return nil, fmt.Errorf("can't convert non-protobuf nodes to cidv0") + return c.EmptyCid, fmt.Errorf("can't convert non-protobuf nodes to cidv0") } return c.NewCidV0(cid.Hash()), nil } -func toCidV1(cid *c.Cid) (*c.Cid, error) { +func toCidV1(cid c.Cid) (c.Cid, error) { return c.NewCidV1(cid.Type(), cid.Hash()), nil } diff --git a/cid.go b/cid.go index 99a26a6..00fc563 100644 --- a/cid.go +++ b/cid.go @@ -126,22 +126,28 @@ var CodecToStr = map[uint64]string{ // They exist to allow IPFS to work with Cids while keeping // compatibility with the plain-multihash format used used in IPFS. // NewCidV1 should be used preferentially. -func NewCidV0(mhash mh.Multihash) *Cid { - return &Cid{ - version: 0, - codec: DagProtobuf, - hash: mhash, - } +func NewCidV0(mhash mh.Multihash) Cid { + return newCid(0, DagProtobuf, mhash) } // NewCidV1 returns a new Cid using the given multicodec-packed // content type. -func NewCidV1(codecType uint64, mhash mh.Multihash) *Cid { - return &Cid{ - version: 1, - codec: codecType, - hash: mhash, +func NewCidV1(codecType uint64, mhash mh.Multihash) Cid { + return newCid(1, codecType, mhash) +} + +func newCid(version, codecType uint64, mhash mh.Multihash) Cid { + hashlen := len(mhash) + // two 8 bytes (max) numbers plus hash + buf := make([]byte, 2*binary.MaxVarintLen64+hashlen) + n := binary.PutUvarint(buf, version) + n += binary.PutUvarint(buf[n:], codecType) + cn := copy(buf[n:], mhash) + if cn != hashlen { + panic("copy hash length is inconsistent") } + + return Cid(buf[:n+hashlen]) } // NewPrefixV0 returns a CIDv0 prefix with the specified multihash type. @@ -168,15 +174,29 @@ func NewPrefixV1(codecType uint64, mhType uint64) Prefix { // Cid represents a self-describing content adressed // identifier. It is formed by a Version, a Codec (which indicates // a multicodec-packed content type) and a Multihash. -type Cid struct { - version uint64 - codec uint64 - hash mh.Multihash +// Byte layout: [version, codec, multihash] +// - version uvarint +// - codec uvarint +// - hash mh.Multihash +type Cid string + +var EmptyCid = Cid(string([]byte{})) + +func (c Cid) version() uint64 { + v, _ := binary.Uvarint([]byte(c)) + return v +} + +func (c Cid) codec() uint64 { + bytes := []byte(c) + _, n := binary.Uvarint(bytes) + codec, _ := binary.Uvarint(bytes[n:]) + return codec } // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. -func Parse(v interface{}) (*Cid, error) { +func Parse(v interface{}) (Cid, error) { switch v2 := v.(type) { case string: if strings.Contains(v2, "/ipfs/") { @@ -187,10 +207,10 @@ func Parse(v interface{}) (*Cid, error) { return Cast(v2) case mh.Multihash: return NewCidV0(v2), nil - case *Cid: + case Cid: return v2, nil default: - return nil, fmt.Errorf("can't parse %+v as Cid", v2) + return EmptyCid, fmt.Errorf("can't parse %+v as Cid", v2) } } @@ -206,15 +226,15 @@ func Parse(v interface{}) (*Cid, error) { // Decode will also detect and parse CidV0 strings. Strings // starting with "Qm" are considered CidV0 and treated directly // as B58-encoded multihashes. -func Decode(v string) (*Cid, error) { +func Decode(v string) (Cid, error) { if len(v) < 2 { - return nil, ErrCidTooShort + return EmptyCid, ErrCidTooShort } if len(v) == 46 && v[:2] == "Qm" { hash, err := mh.FromB58String(v) if err != nil { - return nil, err + return EmptyCid, err } return NewCidV0(hash), nil @@ -222,7 +242,7 @@ func Decode(v string) (*Cid, error) { _, data, err := mbase.Decode(v) if err != nil { - return nil, err + return EmptyCid, err } return Cast(data) @@ -250,59 +270,51 @@ func uvError(read int) error { // // Please use decode when parsing a regular Cid string, as Cast does not // expect multibase-encoded data. Cast accepts the output of Cid.Bytes(). -func Cast(data []byte) (*Cid, error) { +func Cast(data []byte) (Cid, error) { if len(data) == 34 && data[0] == 18 && data[1] == 32 { h, err := mh.Cast(data) if err != nil { - return nil, err + return EmptyCid, err } - return &Cid{ - codec: DagProtobuf, - version: 0, - hash: h, - }, nil + return NewCidV0(h), nil } vers, n := binary.Uvarint(data) if err := uvError(n); err != nil { - return nil, err + return EmptyCid, err } if vers != 0 && vers != 1 { - return nil, fmt.Errorf("invalid cid version number: %d", vers) + return EmptyCid, fmt.Errorf("invalid cid version number: %d", vers) } - codec, cn := binary.Uvarint(data[n:]) + _, cn := binary.Uvarint(data[n:]) if err := uvError(cn); err != nil { - return nil, err + return EmptyCid, err } rest := data[n+cn:] h, err := mh.Cast(rest) if err != nil { - return nil, err + return EmptyCid, err } - return &Cid{ - version: vers, - codec: codec, - hash: h, - }, nil + return Cid(data[0 : n+cn+len(h)]), nil } // Type returns the multicodec-packed content type of a Cid. -func (c *Cid) Type() uint64 { - return c.codec +func (c Cid) Type() uint64 { + return c.codec() } // String returns the default string representation of a // Cid. Currently, Base58 is used as the encoding for the // multibase string. -func (c *Cid) String() string { - switch c.version { +func (c Cid) String() string { + switch c.version() { case 0: - return c.hash.B58String() + return c.Hash().B58String() case 1: mbstr, err := mbase.Encode(mbase.Base58BTC, c.bytesV1()) if err != nil { @@ -317,13 +329,13 @@ func (c *Cid) String() string { // String returns the string representation of a Cid // encoded is selected base -func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) { - switch c.version { +func (c Cid) StringOfBase(base mbase.Encoding) (string, error) { + switch c.version() { case 0: if base != mbase.Base58BTC { return "", ErrInvalidEncoding } - return c.hash.B58String(), nil + return c.Hash().B58String(), nil case 1: return mbase.Encode(base, c.bytesV1()) default: @@ -332,15 +344,21 @@ func (c *Cid) StringOfBase(base mbase.Encoding) (string, error) { } // Hash returns the multihash contained by a Cid. -func (c *Cid) Hash() mh.Multihash { - return c.hash +func (c Cid) Hash() mh.Multihash { + bytes := []byte(c) + // skip version length + _, n1 := binary.Uvarint(bytes) + // skip codec length + _, n2 := binary.Uvarint(bytes[n1:]) + + return mh.Multihash(bytes[n1+n2:]) } // Bytes returns the byte representation of a Cid. // The output of bytes can be parsed back into a Cid // with Cast(). -func (c *Cid) Bytes() []byte { - switch c.version { +func (c Cid) Bytes() []byte { + switch c.version() { case 0: return c.bytesV0() case 1: @@ -350,30 +368,20 @@ func (c *Cid) Bytes() []byte { } } -func (c *Cid) bytesV0() []byte { - return []byte(c.hash) +func (c Cid) bytesV0() []byte { + return []byte(c.Hash()) } -func (c *Cid) bytesV1() []byte { - // two 8 bytes (max) numbers plus hash - buf := make([]byte, 2*binary.MaxVarintLen64+len(c.hash)) - n := binary.PutUvarint(buf, c.version) - n += binary.PutUvarint(buf[n:], c.codec) - cn := copy(buf[n:], c.hash) - if cn != len(c.hash) { - panic("copy hash length is inconsistent") - } - - return buf[:n+len(c.hash)] +func (c Cid) bytesV1() []byte { + return []byte(c) } // Equals checks that two Cids are the same. // In order for two Cids to be considered equal, the // Version, the Codec and the Multihash must match. -func (c *Cid) Equals(o *Cid) bool { - return c.codec == o.codec && - c.version == o.version && - bytes.Equal(c.hash, o.hash) +func (c Cid) Equals(o Cid) bool { + // TODO: can we use regular string equality? + return bytes.Equal([]byte(c), []byte(o)) } // UnmarshalJSON parses the JSON representation of a Cid. @@ -398,9 +406,8 @@ func (c *Cid) UnmarshalJSON(b []byte) error { return err } - c.version = out.version - c.hash = out.hash - c.codec = out.codec + *c = out[:] + return nil } @@ -410,31 +417,31 @@ func (c *Cid) UnmarshalJSON(b []byte) error { // // Note that this formatting comes from the IPLD specification // (https://github.com/ipld/specs/tree/master/ipld) -func (c *Cid) MarshalJSON() ([]byte, error) { +func (c Cid) MarshalJSON() ([]byte, error) { return []byte(fmt.Sprintf("{\"/\":\"%s\"}", c.String())), nil } // KeyString casts the result of cid.Bytes() as a string, and returns it. -func (c *Cid) KeyString() string { - return string(c.Bytes()) +func (c Cid) KeyString() string { + return string(c) } // Loggable returns a Loggable (as defined by // https://godoc.org/github.com/ipfs/go-log). -func (c *Cid) Loggable() map[string]interface{} { +func (c Cid) Loggable() map[string]interface{} { return map[string]interface{}{ "cid": c, } } // Prefix builds and returns a Prefix out of a Cid. -func (c *Cid) Prefix() Prefix { - dec, _ := mh.Decode(c.hash) // assuming we got a valid multiaddr, this will not error +func (c Cid) Prefix() Prefix { + dec, _ := mh.Decode(c.Hash()) // assuming we got a valid multiaddr, this will not error return Prefix{ MhType: dec.Code, MhLength: dec.Length, - Version: c.version, - Codec: c.codec, + Version: c.version(), + Codec: c.codec(), } } @@ -451,10 +458,10 @@ type Prefix struct { // Sum uses the information in a prefix to perform a multihash.Sum() // and return a newly constructed Cid with the resulting multihash. -func (p Prefix) Sum(data []byte) (*Cid, error) { +func (p Prefix) Sum(data []byte) (Cid, error) { hash, err := mh.Sum(data, p.MhType, p.MhLength) if err != nil { - return nil, err + return EmptyCid, err } switch p.Version { @@ -463,7 +470,7 @@ func (p Prefix) Sum(data []byte) (*Cid, error) { case 1: return NewCidV1(p.Codec, hash), nil default: - return nil, fmt.Errorf("invalid cid version") + return EmptyCid, fmt.Errorf("invalid cid version") } } diff --git a/cid_fuzz.go b/cid_fuzz.go index 357e907..99842b5 100644 --- a/cid_fuzz.go +++ b/cid_fuzz.go @@ -23,7 +23,7 @@ func Fuzz(data []byte) int { if err != nil { panic(err.Error()) } - cid2 := &Cid{} + cid2 := Cid{} err = cid2.UnmarshalJSON(json) if err != nil { panic(err.Error()) diff --git a/cid_test.go b/cid_test.go index b7bdd89..049469f 100644 --- a/cid_test.go +++ b/cid_test.go @@ -35,16 +35,16 @@ var tCodecs = map[uint64]string{ ZcashTx: "zcash-tx", } -func assertEqual(t *testing.T, a, b *Cid) { - if a.codec != b.codec { +func assertEqual(t *testing.T, a, b Cid) { + if a.codec() != b.codec() { t.Fatal("mismatch on type") } - if a.version != b.version { + if a.version() != b.version() { t.Fatal("mismatch on version") } - if !bytes.Equal(a.hash, b.hash) { + if !bytes.Equal(a.Hash(), b.Hash()) { t.Fatal("multihash mismatch") } } @@ -75,11 +75,7 @@ func TestBasicMarshaling(t *testing.T) { t.Fatal(err) } - cid := &Cid{ - codec: 7, - version: 1, - hash: h, - } + cid := newCid(1, 7, h) data := cid.Bytes() @@ -105,11 +101,7 @@ func TestBasesMarshaling(t *testing.T) { t.Fatal(err) } - cid := &Cid{ - codec: 7, - version: 1, - hash: h, - } + cid := newCid(1, 7, h) data := cid.Bytes() @@ -168,12 +160,12 @@ func TestV0Handling(t *testing.T) { t.Fatal(err) } - if cid.version != 0 { + if cid.version() != 0 { t.Fatal("should have gotten version 0 cid") } - if cid.hash.B58String() != old { - t.Fatal("marshaling roundtrip failed") + if cid.Hash().B58String() != old { + t.Fatalf("marshaling roundtrip failed: %s != %s", cid.Hash().B58String(), old) } if cid.String() != old { @@ -279,9 +271,7 @@ func TestPrefixRoundtrip(t *testing.T) { func Test16BytesVarint(t *testing.T) { data := []byte("this is some test content") hash, _ := mh.Sum(data, mh.SHA2_256, -1) - c := NewCidV1(DagCBOR, hash) - - c.codec = 1 << 63 + c := newCid(1, 1<<63, hash) _ = c.Bytes() } @@ -324,8 +314,8 @@ func TestParse(t *testing.T) { if err != nil { return err } - if cid.version != 0 { - return fmt.Errorf("expected version 0, got %s", string(cid.version)) + if cid.version() != 0 { + return fmt.Errorf("expected version 0, got %s", string(cid.version())) } actual := cid.Hash().B58String() if actual != expected { @@ -383,3 +373,39 @@ func TestFromJson(t *testing.T) { t.Fatal("json parsing failed") } } + +func BenchmarkStringV1(b *testing.B) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + cid := NewCidV1(Raw, hash) + + b.ReportAllocs() + b.ResetTimer() + + count := 0 + for i := 0; i < b.N; i++ { + count += len(cid.String()) + } + if count != 49*b.N { + b.FailNow() + } +} + +// making sure we don't allocate when returning bytes +func BenchmarkBytesV1(b *testing.B) { + data := []byte("this is some test content") + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + cid := NewCidV1(Raw, hash) + + b.ReportAllocs() + b.ResetTimer() + + count := 0 + for i := 0; i < b.N; i++ { + count += len(cid.Bytes()) + count += len([]byte(cid)) + } + if count != 36*2*b.N { + b.FailNow() + } +} diff --git a/set.go b/set.go index b801ade..ce5b6f8 100644 --- a/set.go +++ b/set.go @@ -3,28 +3,28 @@ package cid // Set is a implementation of a set of Cids, that is, a structure // to which holds a single copy of every Cids that is added to it. type Set struct { - set map[string]struct{} + set map[Cid]struct{} } // NewSet initializes and returns a new Set. func NewSet() *Set { - return &Set{set: make(map[string]struct{})} + return &Set{set: make(map[Cid]struct{})} } // Add puts a Cid in the Set. -func (s *Set) Add(c *Cid) { - s.set[string(c.Bytes())] = struct{}{} +func (s *Set) Add(c Cid) { + s.set[c] = struct{}{} } // Has returns if the Set contains a given Cid. -func (s *Set) Has(c *Cid) bool { - _, ok := s.set[string(c.Bytes())] +func (s *Set) Has(c Cid) bool { + _, ok := s.set[c] return ok } // Remove deletes a Cid from the Set. -func (s *Set) Remove(c *Cid) { - delete(s.set, string(c.Bytes())) +func (s *Set) Remove(c Cid) { + delete(s.set, c) } // Len returns how many elements the Set has. @@ -33,18 +33,17 @@ func (s *Set) Len() int { } // Keys returns the Cids in the set. -func (s *Set) Keys() []*Cid { - out := make([]*Cid, 0, len(s.set)) +func (s *Set) Keys() []Cid { + out := make([]Cid, 0, len(s.set)) for k := range s.set { - c, _ := Cast([]byte(k)) - out = append(out, c) + out = append(out, k) } return out } // Visit adds a Cid to the set only if it is // not in it already. -func (s *Set) Visit(c *Cid) bool { +func (s *Set) Visit(c Cid) bool { if !s.Has(c) { s.Add(c) return true @@ -55,7 +54,7 @@ func (s *Set) Visit(c *Cid) bool { // ForEach allows to run a custom function on each // Cid in the set. -func (s *Set) ForEach(f func(c *Cid) error) error { +func (s *Set) ForEach(f func(c Cid) error) error { for cs := range s.set { c, _ := Cast([]byte(cs)) err := f(c) diff --git a/set_test.go b/set_test.go new file mode 100644 index 0000000..01569a8 --- /dev/null +++ b/set_test.go @@ -0,0 +1,54 @@ +package cid + +import ( + "fmt" + "testing" + + mh "github.com/multiformats/go-multihash" +) + +func makeCid(i int) Cid { + data := []byte(fmt.Sprintf("this is some test content %d", i)) + hash, _ := mh.Sum(data, mh.SHA2_256, -1) + return NewCidV1(Raw, hash) +} + +func TestSetRemove(t *testing.T) { + s := NewSet() + + c1 := makeCid(1) + s.Add(c1) + + if !s.Has(c1) { + t.Fatal("failed to add cid") + } + + s.Remove(c1) + if s.Has(c1) { + t.Fatal("failed to remove cid") + } + + // make sure this doesn't fail, removing a removed one + s.Remove(c1) +} + +func BenchmarkSetVisit(b *testing.B) { + s := NewSet() + + cids := make([]Cid, b.N) + for i := 0; i < b.N; i++ { + cids[i] = makeCid(i) + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + s.Visit(cids[i]) + // twice to ensure we test the adding of an existing element + s.Visit(cids[i]) + } + if s.Len() != b.N { + b.FailNow() + } +}