Skip to content

Commit

Permalink
feat: Has() and Get() will respect StoreIdentityCIDs option
Browse files Browse the repository at this point in the history
When StoreIdentityCIDs is set, it will defer to the index to check whether the
blocks are in the CAR. When the CAR is a v1 and StoreIdentityCIDs is set, the
index will contain the identity CIDs. When it's a v2 with an existing index,
however that index was created will determine whether the identity CIDs are
that are in the CAR are found.

When StoreIdentityCIDs is not set, Has() will always return true and Get() will
always return the block.
  • Loading branch information
rvagg committed Sep 9, 2022
1 parent 1478bbd commit 02d658f
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 20 deletions.
46 changes: 32 additions & 14 deletions v2/blockstore/readonly.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,23 @@ func (b *ReadOnly) DeleteBlock(_ context.Context, _ cid.Cid) error {
}

// Has indicates if the store contains a block that corresponds to the given key.
// This function always returns true for any given key with multihash.IDENTITY code.
// This function always returns true for any given key with multihash.IDENTITY
// code unless the StoreIdentityCIDs option is on, in which case it will defer
// to the index to check for the existence of the block; the index may or may
// not contain identity CIDs included in this CAR, depending on whether
// StoreIdentityCIDs was on when the index was created. If the CAR is a CARv1
// and StoreIdentityCIDs is on, then the index will contain identity CIDs and
// this will always return true.
func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) {
// Check if the given CID has multihash.IDENTITY code
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
if _, ok, err := isIdentity(key); err != nil {
return false, err
} else if ok {
return true, nil
if !b.opts.StoreIdentityCIDs {
// If we don't store identity CIDs then we can return them straight away as if they are here,
// otherwise we need to check for their existence.
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
if _, ok, err := isIdentity(key); err != nil {
return false, err
} else if ok {
return true, nil
}
}

b.mu.RLock()
Expand Down Expand Up @@ -269,14 +278,23 @@ func (b *ReadOnly) Has(ctx context.Context, key cid.Cid) (bool, error) {
}

// Get gets a block corresponding to the given key.
// This API will always return true if the given key has multihash.IDENTITY code.
// This function always returns the block for any given key with
// multihash.IDENTITY code unless the StoreIdentityCIDs option is on, in which
// case it will defer to the index to check for the existence of the block; the
// index may or may not contain identity CIDs included in this CAR, depending on
// whether StoreIdentityCIDs was on when the index was created. If the CAR is a
// CARv1 and StoreIdentityCIDs is on, then the index will contain identity CIDs
// and this will always return true.
func (b *ReadOnly) Get(ctx context.Context, key cid.Cid) (blocks.Block, error) {
// Check if the given CID has multihash.IDENTITY code
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
if digest, ok, err := isIdentity(key); err != nil {
return nil, err
} else if ok {
return blocks.NewBlockWithCid(digest, key)
if !b.opts.StoreIdentityCIDs {
// If we don't store identity CIDs then we can return them straight away as if they are here,
// otherwise we need to check for their existence.
// Note, we do this without locking, since there is no shared information to lock for in order to perform the check.
if digest, ok, err := isIdentity(key); err != nil {
return nil, err
} else if ok {
return blocks.NewBlockWithCid(digest, key)
}
}

b.mu.RLock()
Expand Down
39 changes: 35 additions & 4 deletions v2/blockstore/readonly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/ipfs/go-merkledag"
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/internal/carv1"
"github.com/multiformats/go-multicodec"
"github.com/stretchr/testify/require"
)

Expand All @@ -33,31 +34,53 @@ func TestReadOnly(t *testing.T) {
name string
v1OrV2path string
opts []carv2.Option
noIdCids bool
}{
{
"OpenedWithCarV1",
"../testdata/sample-v1.car",
[]carv2.Option{UseWholeCIDs(true), carv2.StoreIdentityCIDs(true)},
// index is made, but identity CIDs are included so they'll be found
false,
},
{
"OpenedWithCarV1_NoIdentityCID",
"../testdata/sample-v1.car",
[]carv2.Option{UseWholeCIDs(true)},
// index is made, identity CIDs are not included, but we always short-circuit when StoreIdentityCIDs(false)
false,
},
{
"OpenedWithCarV2",
"../testdata/sample-wrapped-v2.car",
[]carv2.Option{UseWholeCIDs(true), carv2.StoreIdentityCIDs(true)},
// index already exists, but was made without identity CIDs, but opening with StoreIdentityCIDs(true) means we check the index
true,
},
{
"OpenedWithCarV2_NoIdentityCID",
"../testdata/sample-wrapped-v2.car",
[]carv2.Option{UseWholeCIDs(true)},
// index already exists, it was made without identity CIDs, but we always short-circuit when StoreIdentityCIDs(false)
false,
},
{
"OpenedWithCarV1ZeroLenSection",
"../testdata/sample-v1-with-zero-len-section.car",
[]carv2.Option{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)},
false,
},
{
"OpenedWithAnotherCarV1ZeroLenSection",
"../testdata/sample-v1-with-zero-len-section2.car",
[]carv2.Option{UseWholeCIDs(true), carv2.ZeroLengthSectionAsEOF(true)},
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx := context.TODO()

subject, err := OpenReadOnly(tt.v1OrV2path, tt.opts...)
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, subject.Close()) })
Expand Down Expand Up @@ -89,7 +112,13 @@ func TestReadOnly(t *testing.T) {
// Assert blockstore contains key.
has, err := subject.Has(ctx, key)
require.NoError(t, err)
require.True(t, has)
if key.Prefix().MhType == uint64(multicodec.Identity) && tt.noIdCids {
// fixture wasn't made with StoreIdentityCIDs, but we opened it with StoreIdentityCIDs,
// so they aren't there to find
require.False(t, has)
} else {
require.True(t, has)
}

// Assert size matches block raw data length.
gotSize, err := subject.GetSize(ctx, key)
Expand All @@ -98,9 +127,11 @@ func TestReadOnly(t *testing.T) {
require.Equal(t, wantSize, gotSize)

// Assert block itself matches v1 payload block.
gotBlock, err := subject.Get(ctx, key)
require.NoError(t, err)
require.Equal(t, wantBlock, gotBlock)
if has {
gotBlock, err := subject.Get(ctx, key)
require.NoError(t, err)
require.Equal(t, wantBlock, gotBlock)
}

// Assert write operations error
require.Error(t, subject.Put(ctx, wantBlock))
Expand Down
4 changes: 4 additions & 0 deletions v2/index_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ func GenerateIndex(v1r io.Reader, opts ...Option) (index.Index, error) {
// LoadIndex populates idx with index records generated from r.
// The r may be in CARv1 or CARv2 format.
//
// If the StoreIdentityCIDs option is set when calling LoadIndex, identity
// CIDs will be included in the index. By default this option is off, and
// identity CIDs will not be included in the index.
//
// Note, the index is re-generated every time even if r is in CARv2 format and already has an index.
// To read existing index when available see ReadOrGenerateIndex.
func LoadIndex(idx index.Index, r io.Reader, opts ...Option) error {
Expand Down
11 changes: 9 additions & 2 deletions v2/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,15 @@ func WithoutIndex() Option {

// StoreIdentityCIDs sets whether to persist sections that are referenced by
// CIDs with multihash.IDENTITY digest.
// When writing CAR files with this option,
// Characteristics.IsFullyIndexed will be set.
// When writing CAR files with this option, Characteristics.IsFullyIndexed will
// be set.
//
// By default, the blockstore interface will always return true for Has() called
// with identity CIDs, but when this option is turned on, it will defer to the
// index.
//
// When creating an index (or loading a CARv1 as a blockstore), when this option
// is on, identity CIDs will be included in the index.
//
// This option is disabled by default.
func StoreIdentityCIDs(b bool) Option {
Expand Down

0 comments on commit 02d658f

Please sign in to comment.