diff --git a/v2/blockstore/doc.go b/v2/blockstore/doc.go index 6b96b7a6..450aee11 100644 --- a/v2/blockstore/doc.go +++ b/v2/blockstore/doc.go @@ -22,7 +22,7 @@ // * blockstore.Has will always return true. // * blockstore.Get will always succeed, returning the multihash digest of the given CID. // * blockstore.GetSize will always succeed, returning the multihash digest length of the given CID. -// * blockstore.Put and blockstore.PutMany will always succeed without performing any operation. +// * blockstore.Put and blockstore.PutMany will always succeed without performing any operation unless car.IncludeIdentityCIDs is enabled. // // See: https://pkg.go.dev/github.com/ipfs/go-ipfs-blockstore#NewIdStore package blockstore diff --git a/v2/blockstore/readonly_test.go b/v2/blockstore/readonly_test.go index 129e6b0d..afe3a68c 100644 --- a/v2/blockstore/readonly_test.go +++ b/v2/blockstore/readonly_test.go @@ -32,48 +32,50 @@ func TestReadOnly(t *testing.T) { name string v1OrV2path string opts []carv2.Option - v1r *carv1.CarReader }{ { "OpenedWithCarV1", "../testdata/sample-v1.car", - []carv2.Option{UseWholeCIDs(true)}, - newV1ReaderFromV1File(t, "../testdata/sample-v1.car", false), + []carv2.Option{UseWholeCIDs(true), carv2.IncludeIdentityCIDs(true)}, }, { "OpenedWithCarV2", "../testdata/sample-wrapped-v2.car", - []carv2.Option{UseWholeCIDs(true)}, - newV1ReaderFromV2File(t, "../testdata/sample-wrapped-v2.car", false), + []carv2.Option{UseWholeCIDs(true), carv2.IncludeIdentityCIDs(true)}, }, { "OpenedWithCarV1ZeroLenSection", "../testdata/sample-v1-with-zero-len-section.car", []carv2.Option{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)}, - newV1ReaderFromV1File(t, "../testdata/sample-v1-with-zero-len-section.car", true), }, { "OpenedWithAnotherCarV1ZeroLenSection", "../testdata/sample-v1-with-zero-len-section2.car", []carv2.Option{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)}, - newV1ReaderFromV1File(t, "../testdata/sample-v1-with-zero-len-section2.car", true), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { subject, err := OpenReadOnly(tt.v1OrV2path, tt.opts...) + require.NoError(t, err) t.Cleanup(func() { require.NoError(t, subject.Close()) }) + + f, err := os.Open(tt.v1OrV2path) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, f.Close()) }) + + reader, err := carv2.NewBlockReader(f, tt.opts...) require.NoError(t, err) // Assert roots match v1 payload. - wantRoots := tt.v1r.Header.Roots + wantRoots := reader.Roots gotRoots, err := subject.Roots() require.NoError(t, err) require.Equal(t, wantRoots, gotRoots) var wantCids []cid.Cid for { - wantBlock, err := tt.v1r.Next() + wantBlock, err := reader.Next() if err == io.EOF { break } diff --git a/v2/blockstore/readwrite.go b/v2/blockstore/readwrite.go index 4b311908..657fcc7b 100644 --- a/v2/blockstore/readwrite.go +++ b/v2/blockstore/readwrite.go @@ -295,11 +295,23 @@ func (b *ReadWrite) PutMany(blks []blocks.Block) error { for _, bl := range blks { c := bl.Cid() - // Check for IDENTITY CID. If IDENTITY, ignore and move to the next block. - if _, ok, err := isIdentity(c); err != nil { - return err - } else if ok { - continue + // If IncludeIdentityCIDs option is disabled then treat IDENTITY CIDs like IdStore. + if !b.opts.IncludeIdentityCIDs { + // Check for IDENTITY CID. If IDENTITY, ignore and move to the next block. + if _, ok, err := isIdentity(c); err != nil { + return err + } else if ok { + continue + } + } + + // Check if its size is too big. + // If larger than maximum allowed size, return error. + // Note, we need to check this regardless of whether we have IDENTITY CID or not. + // Since multhihash codes other than IDENTITY can result in large digests. + cSize := uint64(len(c.Bytes())) + if cSize > b.opts.MaxIndexCidSize { + return carv2.NewErrCidTooLarge(b.opts.MaxIndexCidSize, cSize) } if !b.opts.BlockstoreAllowDuplicatePuts { @@ -351,6 +363,7 @@ func (b *ReadWrite) Finalize() error { // TODO check if add index option is set and don't write the index then set index offset to zero. b.header = b.header.WithDataSize(uint64(b.dataWriter.Position())) + b.header.Characteristics.SetFullyIndexed(b.opts.IncludeIdentityCIDs) // Note that we can't use b.Close here, as that tries to grab the same // mutex we're holding here. diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index cfdad464..2aefdd65 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -2,6 +2,7 @@ package blockstore_test import ( "context" + "crypto/sha512" "fmt" "io" "io/ioutil" @@ -12,21 +13,19 @@ import ( "testing" "time" + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + ipfsblockstore "github.com/ipfs/go-ipfs-blockstore" + cbor "github.com/ipfs/go-ipld-cbor" "github.com/ipfs/go-merkledag" - carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/blockstore" "github.com/ipld/go-car/v2/index" - "github.com/stretchr/testify/assert" - + "github.com/ipld/go-car/v2/internal/carv1" + "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - ipfsblockstore "github.com/ipfs/go-ipfs-blockstore" - "github.com/ipld/go-car/v2/blockstore" - - blocks "github.com/ipfs/go-block-format" - "github.com/ipfs/go-cid" - "github.com/ipld/go-car/v2/internal/carv1" ) var ( @@ -688,3 +687,165 @@ func TestReadWriteErrorAfterClose(t *testing.T) { // in progress. } } + +func TestOpenReadWrite_WritesIdentityCIDsWhenOptionIsEnabled(t *testing.T) { + path := filepath.Join(t.TempDir(), "readwrite-with-id-enabled.car") + subject, err := blockstore.OpenReadWrite(path, []cid.Cid{}, carv2.IncludeIdentityCIDs(true)) + require.NoError(t, err) + + data := []byte("fish") + idmh, err := multihash.Sum(data, multihash.IDENTITY, -1) + require.NoError(t, err) + idCid := cid.NewCidV1(uint64(multicodec.Raw), idmh) + + idBlock, err := blocks.NewBlockWithCid(data, idCid) + require.NoError(t, err) + err = subject.Put(idBlock) + require.NoError(t, err) + + has, err := subject.Has(idCid) + require.NoError(t, err) + require.True(t, has) + + gotBlock, err := subject.Get(idCid) + require.NoError(t, err) + require.Equal(t, idBlock, gotBlock) + + keysChan, err := subject.AllKeysChan(context.Background()) + require.NoError(t, err) + var i int + for c := range keysChan { + i++ + require.Equal(t, idCid, c) + } + require.Equal(t, 1, i) + + err = subject.Finalize() + require.NoError(t, err) + + // Assert resulting CAR file indeed has the IDENTITY block. + f, err := os.Open(path) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, f.Close()) }) + + reader, err := carv2.NewBlockReader(f) + require.NoError(t, err) + + gotBlock, err = reader.Next() + require.NoError(t, err) + require.Equal(t, idBlock, gotBlock) + + next, err := reader.Next() + require.Equal(t, io.EOF, err) + require.Nil(t, next) + + // Assert the id is indexed. + r, err := carv2.OpenReader(path) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, r.Close()) }) + require.True(t, r.Header.HasIndex()) + + ir := r.IndexReader() + require.NotNil(t, ir) + + gotIdx, err := index.ReadFrom(ir) + require.NoError(t, err) + + // Determine expected offset as the length of header plus one + header, err := carv1.ReadHeader(r.DataReader()) + require.NoError(t, err) + object, err := cbor.DumpObject(header) + require.NoError(t, err) + expectedOffset := len(object) + 1 + + // Assert index is iterable and has exactly one record with expected multihash and offset. + switch idx := gotIdx.(type) { + case index.IterableIndex: + var i int + err := idx.ForEach(func(mh multihash.Multihash, offset uint64) error { + i++ + require.Equal(t, idmh, mh) + require.Equal(t, uint64(expectedOffset), offset) + return nil + }) + require.NoError(t, err) + require.Equal(t, 1, i) + default: + require.Failf(t, "unexpected index type", "wanted %v but got %v", multicodec.CarMultihashIndexSorted, idx.Codec()) + } +} + +func TestOpenReadWrite_ErrorsWhenWritingTooLargeOfACid(t *testing.T) { + maxAllowedCidSize := uint64(2) + path := filepath.Join(t.TempDir(), "readwrite-with-id-enabled-too-large.car") + subject, err := blockstore.OpenReadWrite(path, []cid.Cid{}, carv2.MaxIndexCidSize(maxAllowedCidSize)) + t.Cleanup(subject.Discard) + require.NoError(t, err) + + data := []byte("monsterlobster") + mh, err := multihash.Sum(data, multihash.SHA2_256, -1) + require.NoError(t, err) + bigCid := cid.NewCidV1(uint64(multicodec.Raw), mh) + bigCidLen := uint64(bigCid.ByteLen()) + require.True(t, bigCidLen > maxAllowedCidSize) + + bigBlock, err := blocks.NewBlockWithCid(data, bigCid) + require.NoError(t, err) + err = subject.Put(bigBlock) + require.Equal(t, carv2.NewErrCidTooLarge(maxAllowedCidSize, bigCidLen), err) +} + +func TestReadWrite_ReWritingCARv1WithIdentityCidIsIdenticalToOriginalWithOptionsEnabled(t *testing.T) { + originalCARv1Path := "../testdata/sample-v1.car" + originalCarV1, err := os.Open(originalCARv1Path) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, originalCarV1.Close()) }) + + r, err := carv2.NewBlockReader(originalCarV1) + require.NoError(t, err) + + path := filepath.Join(t.TempDir(), "readwrite-from-carv1-with-id-enabled.car") + subject, err := blockstore.OpenReadWrite(path, r.Roots, carv2.IncludeIdentityCIDs(true)) + require.NoError(t, err) + var idCidCount int + for { + next, err := r.Next() + if err == io.EOF { + break + } + require.NoError(t, err) + if next.Cid().Prefix().MhType == multihash.IDENTITY { + idCidCount++ + } + err = subject.Put(next) + require.NoError(t, err) + } + require.NotZero(t, idCidCount) + err = subject.Finalize() + require.NoError(t, err) + + v2r, err := carv2.OpenReader(path) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, v2r.Close()) }) + + // Assert characteristics bit is set. + require.True(t, v2r.Header.Characteristics.IsFullyIndexed()) + + // Assert original CARv1 and generated innter CARv1 payload have the same SHA512 hash + // Note, we hash instead of comparing bytes to avoid excessive memory usage when sample CARv1 is large. + + hasher := sha512.New() + gotWritten, err := io.Copy(hasher, v2r.DataReader()) + require.NoError(t, err) + gotSum := hasher.Sum(nil) + + hasher.Reset() + _, err = originalCarV1.Seek(0, io.SeekStart) + require.NoError(t, err) + wantWritten, err := io.Copy(hasher, originalCarV1) + require.NoError(t, err) + wantSum := hasher.Sum(nil) + + require.Equal(t, wantWritten, gotWritten) + require.Equal(t, wantSum, gotSum) +} diff --git a/v2/car.go b/v2/car.go index d1268331..d101753b 100644 --- a/v2/car.go +++ b/v2/car.go @@ -43,6 +43,9 @@ type ( } ) +// fullyIndexedCharPos is the position of Characteristics.Hi bit that specifies whether the index is a catalog af all CIDs or not. +const fullyIndexedCharPos = 0 + // WriteTo writes this characteristics to the given w. func (c Characteristics) WriteTo(w io.Writer) (n int64, err error) { buf := make([]byte, 16) @@ -64,6 +67,37 @@ func (c *Characteristics) ReadFrom(r io.Reader) (int64, error) { return n, nil } +// IsFullyIndexed specifies whether the index of CARv2 represents a catalog of all CID segments. +// See IncludeIdentityCIDs +func (c *Characteristics) IsFullyIndexed() bool { + return isBitSet(c.Hi, fullyIndexedCharPos) +} + +// SetFullyIndexed sets whether of CARv2 represents a catalog of all CID segments. +func (c *Characteristics) SetFullyIndexed(b bool) { + if b { + c.Hi = setBit(c.Hi, fullyIndexedCharPos) + } else { + c.Hi = unsetBit(c.Hi, fullyIndexedCharPos) + } +} + +func setBit(n uint64, pos uint) uint64 { + n |= 1 << pos + return n +} + +func unsetBit(n uint64, pos uint) uint64 { + mask := uint64(^(1 << pos)) + n &= mask + return n +} + +func isBitSet(n uint64, pos uint) bool { + bit := n & (1 << pos) + return bit > 0 +} + // NewHeader instantiates a new CARv2 header, given the data size. func NewHeader(dataSize uint64) Header { header := Header{ diff --git a/v2/car_test.go b/v2/car_test.go index 4223a560..a7c22db6 100644 --- a/v2/car_test.go +++ b/v2/car_test.go @@ -2,8 +2,11 @@ package car_test import ( "bytes" + "encoding/binary" "testing" + "github.com/stretchr/testify/require" + carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/internal/carv1" "github.com/stretchr/testify/assert" @@ -199,3 +202,20 @@ func TestNewHeaderHasExpectedValues(t *testing.T) { got := carv2.NewHeader(wantCarV1Len) assert.Equal(t, want, got, "NewHeader got = %v, want = %v", got, want) } + +func TestCharacteristics_IncludesIdentityCid(t *testing.T) { + subject := carv2.Characteristics{} + require.False(t, subject.IsFullyIndexed()) + + subject.SetFullyIndexed(true) + b := make([]byte, 8) + binary.LittleEndian.PutUint64(b, subject.Hi) + require.Equal(t, byte(1), b[0]) + require.True(t, subject.IsFullyIndexed()) + + subject.SetFullyIndexed(false) + b = make([]byte, 8) + binary.LittleEndian.PutUint64(b, subject.Hi) + require.Equal(t, byte(0), b[0]) + require.False(t, subject.IsFullyIndexed()) +} diff --git a/v2/errors.go b/v2/errors.go new file mode 100644 index 00000000..37f5a3e3 --- /dev/null +++ b/v2/errors.go @@ -0,0 +1,28 @@ +package car + +import ( + "fmt" +) + +type ( + // ErrCidTooLarge signals that a CID is too large to include in CARv2 index. + ErrCidTooLarge error + + errCidTooLarge struct { + maxSize uint64 + currentSize uint64 + } +) + +// NewErrCidTooLarge constructs a new ErrCidTooLarge that signals a given CID is too large along +// with metadata about maximum allowed and actual size. +func NewErrCidTooLarge(maxSize, currentSize uint64) ErrCidTooLarge { + return &errCidTooLarge{ + maxSize: maxSize, + currentSize: currentSize, + } +} + +func (e *errCidTooLarge) Error() string { + return fmt.Sprintf("cid size is larger than max allowed (%d > %d)", e.currentSize, e.maxSize) +} diff --git a/v2/errors_test.go b/v2/errors_test.go new file mode 100644 index 00000000..5020a1cc --- /dev/null +++ b/v2/errors_test.go @@ -0,0 +1,12 @@ +package car + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNewErrCidTooLarge_ErrorContainsSizes(t *testing.T) { + subject := NewErrCidTooLarge(1413, 1414) + require.EqualError(t, subject, "cid size is larger than max allowed (1414 > 1413)") +} diff --git a/v2/index/index.go b/v2/index/index.go index 8abd1d3a..0b3ef3c6 100644 --- a/v2/index/index.go +++ b/v2/index/index.go @@ -31,9 +31,7 @@ type ( // implementations might index the entire CID, the entire multihash, or // just part of a multihash's digest. // - // In accordance with the CARv2 specification, Index will never contain information about CIDs - // with multihash.IDENTITY code. - // See: https://ipld.io/specs/transport/car/carv2/#index-format + // See: multicodec.CarIndexSorted, multicodec.CarMultihashIndexSorted Index interface { // Codec provides the multicodec code that the index implements. // @@ -47,6 +45,10 @@ type ( Unmarshal(r io.Reader) error // Load inserts a number of records into the index. + // Note that Index will load all given records. Any filtering of the records such as + // exclusion of CIDs with multihash.IDENTITY code must occur prior to calling this function. + // Further, the actual information extracted and indexed from the given records entirely + // depends on the concrete index implementation. Load([]Record) error // GetAll looks up all blocks matching a given CID, @@ -61,7 +63,7 @@ type ( } // IterableIndex extends Index in cases where the Index is able to - // provide an iterator for getting the list of all entries in the + // provide an iterator for getting the list of all multihashes in the // index. IterableIndex interface { Index diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index c6165ffa..6b6c5a68 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -207,12 +207,6 @@ func (m *multiWidthIndex) Load(items []Record) error { return err } - // Ignore records with IDENTITY as required by CARv2 spec. - // See: https://ipld.io/specs/transport/car/carv2/#index-format - if decHash.Code == multihash.IDENTITY { - continue - } - digest := decHash.Digest idx, ok := idxs[len(digest)] if !ok { diff --git a/v2/index/indexsorted_test.go b/v2/index/indexsorted_test.go index 46322e54..5c1ee449 100644 --- a/v2/index/indexsorted_test.go +++ b/v2/index/indexsorted_test.go @@ -4,9 +4,6 @@ import ( "encoding/binary" "testing" - "github.com/ipfs/go-cid" - "github.com/multiformats/go-multihash" - "github.com/ipfs/go-merkledag" "github.com/multiformats/go-multicodec" "github.com/stretchr/testify/require" @@ -65,42 +62,3 @@ func TestSingleWidthIndex_GetAll(t *testing.T) { require.NoError(t, err) require.Equal(t, 3, foundCount) } - -func TestIndexSorted_IgnoresIdentityCids(t *testing.T) { - data := []byte("🐟 in da 🌊d") - // Generate a record with IDENTITY multihash - idMh, err := multihash.Sum(data, multihash.IDENTITY, -1) - require.NoError(t, err) - idRec := Record{ - Cid: cid.NewCidV1(cid.Raw, idMh), - Offset: 1, - } - // Generate a record with non-IDENTITY multihash - nonIdMh, err := multihash.Sum(data, multihash.SHA2_256, -1) - require.NoError(t, err) - noIdRec := Record{ - Cid: cid.NewCidV1(cid.Raw, nonIdMh), - Offset: 2, - } - - subject := newSorted() - err = subject.Load([]Record{idRec, noIdRec}) - require.NoError(t, err) - - // Assert record with IDENTITY CID is not present. - err = subject.GetAll(idRec.Cid, func(u uint64) bool { - require.Fail(t, "no IDENTITY record shoul be found") - return false - }) - require.Equal(t, ErrNotFound, err) - - // Assert record with non-IDENTITY CID is indeed present. - var found bool - err = subject.GetAll(noIdRec.Cid, func(gotOffset uint64) bool { - found = true - require.Equal(t, noIdRec.Offset, gotOffset) - return false - }) - require.NoError(t, err) - require.True(t, found) -} diff --git a/v2/index/mhindexsorted.go b/v2/index/mhindexsorted.go index 6ae2c568..f81e3a94 100644 --- a/v2/index/mhindexsorted.go +++ b/v2/index/mhindexsorted.go @@ -17,7 +17,6 @@ var ( type ( // MultihashIndexSorted maps multihash code (i.e. hashing algorithm) to multiWidthCodedIndex. - // This index ignores any Record with multihash.IDENTITY. MultihashIndexSorted map[uint64]*multiWidthCodedIndex // multiWidthCodedIndex stores multihash code for each multiWidthIndex. multiWidthCodedIndex struct { @@ -123,10 +122,6 @@ func (m *MultihashIndexSorted) Load(records []Record) error { return err } code := dmh.Code - // Ignore IDENTITY multihash in the index. - if code == multihash.IDENTITY { - continue - } recsByCode, ok := byCode[code] if !ok { recsByCode = make([]Record, 0) diff --git a/v2/index/mhindexsorted_test.go b/v2/index/mhindexsorted_test.go index ced8a921..b5ef7b89 100644 --- a/v2/index/mhindexsorted_test.go +++ b/v2/index/mhindexsorted_test.go @@ -20,31 +20,6 @@ func TestMutilhashSortedIndex_Codec(t *testing.T) { require.Equal(t, multicodec.CarMultihashIndexSorted, subject.Codec()) } -func TestMultiWidthCodedIndex_LoadDoesNotLoadIdentityMultihash(t *testing.T) { - rng := rand.New(rand.NewSource(1413)) - identityRecords := generateIndexRecords(t, multihash.IDENTITY, rng) - nonIdentityRecords := generateIndexRecords(t, multihash.SHA2_256, rng) - records := append(identityRecords, nonIdentityRecords...) - - subject, err := index.New(multicodec.CarMultihashIndexSorted) - require.NoError(t, err) - err = subject.Load(records) - require.NoError(t, err) - - // Assert index does not contain any records with IDENTITY multihash code. - for _, r := range identityRecords { - wantCid := r.Cid - err = subject.GetAll(wantCid, func(o uint64) bool { - require.Fail(t, "subject should not contain any records with IDENTITY multihash code") - return false - }) - require.Equal(t, index.ErrNotFound, err) - } - - // Assert however, index does contain the non IDENTITY records. - requireContainsAll(t, subject, nonIdentityRecords) -} - func TestMultiWidthCodedIndex_MarshalUnmarshal(t *testing.T) { rng := rand.New(rand.NewSource(1413)) records := generateIndexRecords(t, multihash.SHA2_256, rng) diff --git a/v2/index_gen.go b/v2/index_gen.go index 4602add5..9d7209f7 100644 --- a/v2/index_gen.go +++ b/v2/index_gen.go @@ -9,15 +9,19 @@ import ( "github.com/ipld/go-car/v2/index" "github.com/ipld/go-car/v2/internal/carv1" internalio "github.com/ipld/go-car/v2/internal/io" + "github.com/multiformats/go-multihash" "github.com/multiformats/go-varint" ) // GenerateIndex generates index for a given car in v1 format. -// The generated index will be in multicodec.CarMultihashIndexSorted, the default index codec. // The index can be stored in serialized format using index.WriteTo. // See LoadIndex. func GenerateIndex(v1r io.Reader, opts ...Option) (index.Index, error) { - idx := index.NewMultihashSorted() + wopts := ApplyOptions(opts...) + idx, err := index.New(wopts.IndexCodec) + if err != nil { + return nil, err + } if err := LoadIndex(idx, v1r, opts...); err != nil { return nil, err } @@ -76,7 +80,13 @@ func LoadIndex(idx index.Index, v1r io.Reader, opts ...Option) error { if err != nil { return err } - records = append(records, index.Record{Cid: c, Offset: uint64(sectionOffset)}) + + if c.Prefix().MhType != multihash.IDENTITY || o.IncludeIdentityCIDs { + if uint64(cidLen) > o.MaxIndexCidSize { + return NewErrCidTooLarge(o.MaxIndexCidSize, uint64(cidLen)) + } + records = append(records, index.Record{Cid: c, Offset: uint64(sectionOffset)}) + } // Seek to the next section by skipping the block. // The section length includes the CID, so subtract it. @@ -94,7 +104,7 @@ func LoadIndex(idx index.Index, v1r io.Reader, opts ...Option) error { } // GenerateIndexFromFile walks a car v1 file at the give path and generates an index of cid->byte offset. -// The index can be stored using index.Save into a file or serialized using index.WriteTo. +// The index can be stored using index.WriteTo. // See GenerateIndex. func GenerateIndexFromFile(path string) (index.Index, error) { f, err := os.Open(path) diff --git a/v2/options.go b/v2/options.go index ad554a79..e7337b8d 100644 --- a/v2/options.go +++ b/v2/options.go @@ -2,6 +2,9 @@ package car import "github.com/multiformats/go-multicodec" +// DefaultMaxIndexCidSize specifies the maximum size in byptes accepted as a section CID by CARv2 index. +const DefaultMaxIndexCidSize = 2 << 10 // 2 KiB + // Option describes an option which affects behavior when interacting with CAR files. type Option func(*Options) @@ -25,6 +28,8 @@ type Options struct { IndexPadding uint64 IndexCodec multicodec.Code ZeroLengthSectionAsEOF bool + MaxIndexCidSize uint64 + IncludeIdentityCIDs bool BlockstoreAllowDuplicatePuts bool BlockstoreUseWholeCIDs bool @@ -42,6 +47,9 @@ func ApplyOptions(opt ...Option) Options { if opts.IndexCodec == 0 { opts.IndexCodec = multicodec.CarMultihashIndexSorted } + if opts.MaxIndexCidSize == 0 { + opts.MaxIndexCidSize = DefaultMaxIndexCidSize + } return opts } @@ -75,3 +83,20 @@ func UseIndexCodec(c multicodec.Code) Option { o.IndexCodec = c } } + +// IncludeIdentityCIDs sets whether to persist sections that are referenced by +// CIDs with multihash.IDENTITY digest. +// This option is disabled by default. +func IncludeIdentityCIDs(b bool) Option { + return func(o *Options) { + o.IncludeIdentityCIDs = b + } +} + +// MaxIndexCidSize specifies the maximum allowed size for indexed CIDs in bytes. +// Indexing a CID with larger than the allowed size results in ErrCidTooLarge error. +func MaxIndexCidSize(s uint64) Option { + return func(o *Options) { + o.MaxIndexCidSize = s + } +} diff --git a/v2/options_test.go b/v2/options_test.go new file mode 100644 index 00000000..c001f2cd --- /dev/null +++ b/v2/options_test.go @@ -0,0 +1,41 @@ +package car_test + +import ( + "testing" + + carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/blockstore" + "github.com/multiformats/go-multicodec" + "github.com/stretchr/testify/require" +) + +func TestApplyOptions_SetsExpectedDefaults(t *testing.T) { + require.Equal(t, carv2.Options{ + IndexCodec: multicodec.CarMultihashIndexSorted, + MaxIndexCidSize: carv2.DefaultMaxIndexCidSize, + }, carv2.ApplyOptions()) +} + +func TestApplyOptions_AppliesOptions(t *testing.T) { + require.Equal(t, + carv2.Options{ + DataPadding: 123, + IndexPadding: 456, + IndexCodec: multicodec.CarIndexSorted, + ZeroLengthSectionAsEOF: true, + MaxIndexCidSize: 789, + IncludeIdentityCIDs: true, + BlockstoreAllowDuplicatePuts: true, + BlockstoreUseWholeCIDs: true, + }, + carv2.ApplyOptions( + carv2.UseDataPadding(123), + carv2.UseIndexPadding(456), + carv2.UseIndexCodec(multicodec.CarIndexSorted), + carv2.ZeroLengthSectionAsEOF(true), + carv2.MaxIndexCidSize(789), + carv2.IncludeIdentityCIDs(true), + blockstore.AllowDuplicatePuts(true), + blockstore.UseWholeCIDs(true), + )) +}