From 0599510d4100e6bbf67daa57df3fe88c932e73a1 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Fri, 2 Sep 2022 20:53:57 +1000 Subject: [PATCH] feat: Has() and Get() will respect StoreIdentityCIDs option When StoreIdentityCIDs is set, it will defer to the index to check whether the blocks are in the CAR. When the CAR is a v1 and StoreIdentityCIDs is set, the index will contain the identity CIDs. When it's a v2 with an existing index, however that index was created will determine whether the identity CIDs are that are in the CAR are found. When StoreIdentityCIDs is not set, Has() will always return true and Get() will always return the block. This commit was moved from ipld/go-car@02d658faa7dfbc01920fc42ad7210c3505d049f4 --- ipld/car/v2/blockstore/readonly.go | 46 +++++++++++++++++-------- ipld/car/v2/blockstore/readonly_test.go | 39 ++++++++++++++++++--- ipld/car/v2/index_gen.go | 4 +++ ipld/car/v2/options.go | 11 ++++-- 4 files changed, 80 insertions(+), 20 deletions(-) diff --git a/ipld/car/v2/blockstore/readonly.go b/ipld/car/v2/blockstore/readonly.go index 32c49046b..bfca69112 100644 --- a/ipld/car/v2/blockstore/readonly.go +++ b/ipld/car/v2/blockstore/readonly.go @@ -217,14 +217,23 @@ func (b *ReadOnly) DeleteBlock(_ context.Context, _ cid.Cid) error { } // Has indicates if the store contains a block that corresponds to the given key. -// This function always returns true for any given key with multihash.IDENTITY code. +// This function always returns true for any given key with multihash.IDENTITY +// code unless the StoreIdentityCIDs option is on, in which case it will defer +// to the index to check for the existence of the block; the index may or may +// not contain identity CIDs included in this CAR, depending on whether +// StoreIdentityCIDs was on when the index was created. If the CAR is a CARv1 +// and StoreIdentityCIDs is on, then the index will contain identity CIDs and +// this will always return true. func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) { - // Check if the given CID has multihash.IDENTITY code - // Note, we do this without locking, since there is no shared information to lock for in order to perform the check. - if _, ok, err := isIdentity(key); err != nil { - return false, err - } else if ok { - return true, nil + if !b.opts.StoreIdentityCIDs { + // If we don't store identity CIDs then we can return them straight away as if they are here, + // otherwise we need to check for their existence. + // Note, we do this without locking, since there is no shared information to lock for in order to perform the check. + if _, ok, err := isIdentity(key); err != nil { + return false, err + } else if ok { + return true, nil + } } b.mu.RLock() @@ -269,14 +278,23 @@ func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) { } // Get gets a block corresponding to the given key. -// This API will always return true if the given key has multihash.IDENTITY code. +// This function always returns the block for any given key with +// multihash.IDENTITY code unless the StoreIdentityCIDs option is on, in which +// case it will defer to the index to check for the existence of the block; the +// index may or may not contain identity CIDs included in this CAR, depending on +// whether StoreIdentityCIDs was on when the index was created. If the CAR is a +// CARv1 and StoreIdentityCIDs is on, then the index will contain identity CIDs +// and this will always return true. func (b *ReadOnly) Get(ctx context.Context, key cid.Cid) (blocks.Block, error) { - // Check if the given CID has multihash.IDENTITY code - // Note, we do this without locking, since there is no shared information to lock for in order to perform the check. - if digest, ok, err := isIdentity(key); err != nil { - return nil, err - } else if ok { - return blocks.NewBlockWithCid(digest, key) + if !b.opts.StoreIdentityCIDs { + // If we don't store identity CIDs then we can return them straight away as if they are here, + // otherwise we need to check for their existence. + // Note, we do this without locking, since there is no shared information to lock for in order to perform the check. + if digest, ok, err := isIdentity(key); err != nil { + return nil, err + } else if ok { + return blocks.NewBlockWithCid(digest, key) + } } b.mu.RLock() diff --git a/ipld/car/v2/blockstore/readonly_test.go b/ipld/car/v2/blockstore/readonly_test.go index a799d328f..6b1b009f8 100644 --- a/ipld/car/v2/blockstore/readonly_test.go +++ b/ipld/car/v2/blockstore/readonly_test.go @@ -14,6 +14,7 @@ import ( "github.com/ipfs/go-merkledag" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/internal/carv1" + "github.com/multiformats/go-multicodec" "github.com/stretchr/testify/require" ) @@ -33,31 +34,53 @@ func TestReadOnly(t *testing.T) { name string v1OrV2path string opts []carv2.Option + noIdCids bool }{ { "OpenedWithCarV1", "../testdata/sample-v1.car", []carv2.Option{UseWholeCIDs(true), carv2.StoreIdentityCIDs(true)}, + // index is made, but identity CIDs are included so they'll be found + false, + }, + { + "OpenedWithCarV1_NoIdentityCID", + "../testdata/sample-v1.car", + []carv2.Option{UseWholeCIDs(true)}, + // index is made, identity CIDs are not included, but we always short-circuit when StoreIdentityCIDs(false) + false, }, { "OpenedWithCarV2", "../testdata/sample-wrapped-v2.car", []carv2.Option{UseWholeCIDs(true), carv2.StoreIdentityCIDs(true)}, + // index already exists, but was made without identity CIDs, but opening with StoreIdentityCIDs(true) means we check the index + true, + }, + { + "OpenedWithCarV2_NoIdentityCID", + "../testdata/sample-wrapped-v2.car", + []carv2.Option{UseWholeCIDs(true)}, + // index already exists, it was made without identity CIDs, but we always short-circuit when StoreIdentityCIDs(false) + false, }, { "OpenedWithCarV1ZeroLenSection", "../testdata/sample-v1-with-zero-len-section.car", []carv2.Option{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)}, + false, }, { "OpenedWithAnotherCarV1ZeroLenSection", "../testdata/sample-v1-with-zero-len-section2.car", []carv2.Option{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)}, + false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := context.TODO() + subject, err := OpenReadOnly(tt.v1OrV2path, tt.opts...) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, subject.Close()) }) @@ -89,7 +112,13 @@ func TestReadOnly(t *testing.T) { // Assert blockstore contains key. has, err := subject.Has(ctx, key) require.NoError(t, err) - require.True(t, has) + if key.Prefix().MhType == uint64(multicodec.Identity) && tt.noIdCids { + // fixture wasn't made with StoreIdentityCIDs, but we opened it with StoreIdentityCIDs, + // so they aren't there to find + require.False(t, has) + } else { + require.True(t, has) + } // Assert size matches block raw data length. gotSize, err := subject.GetSize(ctx, key) @@ -98,9 +127,11 @@ func TestReadOnly(t *testing.T) { require.Equal(t, wantSize, gotSize) // Assert block itself matches v1 payload block. - gotBlock, err := subject.Get(ctx, key) - require.NoError(t, err) - require.Equal(t, wantBlock, gotBlock) + if has { + gotBlock, err := subject.Get(ctx, key) + require.NoError(t, err) + require.Equal(t, wantBlock, gotBlock) + } // Assert write operations error require.Error(t, subject.Put(ctx, wantBlock)) diff --git a/ipld/car/v2/index_gen.go b/ipld/car/v2/index_gen.go index b0b87453e..092ed434c 100644 --- a/ipld/car/v2/index_gen.go +++ b/ipld/car/v2/index_gen.go @@ -34,6 +34,10 @@ func GenerateIndex(v1r io.Reader, opts ...Option) (index.Index, error) { // LoadIndex populates idx with index records generated from r. // The r may be in CARv1 or CARv2 format. // +// If the StoreIdentityCIDs option is set when calling LoadIndex, identity +// CIDs will be included in the index. By default this option is off, and +// identity CIDs will not be included in the index. +// // Note, the index is re-generated every time even if r is in CARv2 format and already has an index. // To read existing index when available see ReadOrGenerateIndex. func LoadIndex(idx index.Index, r io.Reader, opts ...Option) error { diff --git a/ipld/car/v2/options.go b/ipld/car/v2/options.go index d2e526c42..8b5fe9b4e 100644 --- a/ipld/car/v2/options.go +++ b/ipld/car/v2/options.go @@ -127,8 +127,15 @@ func WithoutIndex() Option { // StoreIdentityCIDs sets whether to persist sections that are referenced by // CIDs with multihash.IDENTITY digest. -// When writing CAR files with this option, -// Characteristics.IsFullyIndexed will be set. +// When writing CAR files with this option, Characteristics.IsFullyIndexed will +// be set. +// +// By default, the blockstore interface will always return true for Has() called +// with identity CIDs, but when this option is turned on, it will defer to the +// index. +// +// When creating an index (or loading a CARv1 as a blockstore), when this option +// is on, identity CIDs will be included in the index. // // This option is disabled by default. func StoreIdentityCIDs(b bool) Option {