Skip to content

Commit

Permalink
refactor: use Dag Size instead of UnixFS Size in Listing
Browse files Browse the repository at this point in the history
  • Loading branch information
hacdias committed Dec 8, 2022
1 parent 5e5d15a commit 233ff43
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 110 deletions.
7 changes: 5 additions & 2 deletions assets/dir-index-html/dir-index.html
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@
{{ .Hash }}
</div>
{{ end }}
<p style="margin-bottom: 0">
<a href="?format=json">Preview as JSON.</a> Download as: <a href="?format=car">CAR</a>, <a href="?format=tar">TAR</a>, <a href="?format=dag-json">DAG-JSON</a>, <a href="?format=dag-cbor">DAG-CBOR</a>.
</p>
</div>
{{ if .Size }}
<div class="no-linebreak flex-shrink-1 ml-auto">
<strong>&nbsp;{{ .Size }}</strong>
<strong title="Cumulative Object Size">&nbsp;{{ .Size }}</strong>
</div>
{{ end }}
</div>
Expand Down Expand Up @@ -89,7 +92,7 @@
</a>
{{ end }}
</td>
<td class="no-linebreak">{{ .Size }}</td>
<td class="no-linebreak" title="Cumulative Object Size">{{ .Size }}</td>
</tr>
{{ end }}
</table>
Expand Down
7 changes: 5 additions & 2 deletions assets/dir-index-html/src/dir-index.html
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,13 @@
{{ .Hash }}
</div>
{{ end }}
<p style="margin-bottom: 0">
<a href="?format=json">Preview as JSON.</a> Download as: <a href="?format=car">CAR</a>, <a href="?format=tar">TAR</a>, <a href="?format=dag-json">DAG-JSON</a>, <a href="?format=dag-cbor">DAG-CBOR</a>.
</p>
</div>
{{ if .Size }}
<div class="no-linebreak flex-shrink-1 ml-auto">
<strong>&nbsp;{{ .Size }}</strong>
<strong title="Cumulative Object Size">&nbsp;{{ .Size }}</strong>
</div>
{{ end }}
</div>
Expand Down Expand Up @@ -88,7 +91,7 @@
</a>
{{ end }}
</td>
<td class="no-linebreak">{{ .Size }}</td>
<td class="no-linebreak" title="Cumulative Object Size">{{ .Size }}</td>
</tr>
{{ end }}
</table>
Expand Down
17 changes: 8 additions & 9 deletions assets/dir-index-html/test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ const templateFile = "../dir-index.html"

// Copied from go-ipfs/core/corehttp/gateway_indexPage.go
type listingTemplateData struct {
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
FastDirIndexThreshold int
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
}

type directoryItem struct {
Expand Down
8 changes: 0 additions & 8 deletions config/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,6 @@ type Gateway struct {
// PathPrefixes was removed: https://github.com/ipfs/go-ipfs/issues/7702
PathPrefixes []string

// FastDirIndexThreshold is the maximum number of items in a directory
// before the Gateway switches to a shallow, faster listing which only
// requires the root node. This allows for listing big directories fast,
// without the linear slowdown caused by reading size metadata from child
// nodes.
// Setting to 0 will enable fast listings for all directories.
FastDirIndexThreshold *OptionalInteger `json:",omitempty"`

// FIXME: Not yet implemented: https://github.com/ipfs/kubo/issues/8059
APICommands []string

Expand Down
10 changes: 4 additions & 6 deletions core/corehttp/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ import (
)

type GatewayConfig struct {
Headers map[string][]string
Writable bool
FastDirIndexThreshold int
Headers map[string][]string
Writable bool
}

// NodeAPI defines the minimal set of API services required by a gateway handler
Expand Down Expand Up @@ -83,9 +82,8 @@ func GatewayOption(writable bool, paths ...string) ServeOption {
}

gateway := NewGatewayHandler(GatewayConfig{
Headers: headers,
Writable: writable,
FastDirIndexThreshold: int(cfg.Gateway.FastDirIndexThreshold.WithDefault(100)),
Headers: headers,
Writable: writable,
}, api, offlineAPI)

gateway = otelhttp.NewHandler(gateway, "Gateway.Request")
Expand Down
89 changes: 52 additions & 37 deletions core/corehttp/gateway_handler_unixfs_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ import (
"github.com/dustin/go-humanize"
cid "github.com/ipfs/go-cid"
files "github.com/ipfs/go-ipfs-files"
format "github.com/ipfs/go-ipld-format"
merkledag "github.com/ipfs/go-merkledag"
path "github.com/ipfs/go-path"
"github.com/ipfs/go-path/resolver"
options "github.com/ipfs/interface-go-ipfs-core/options"
"github.com/ipfs/go-unixfs"
"github.com/ipfs/go-unixfs/hamt"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"github.com/ipfs/kubo/assets"
"github.com/ipfs/kubo/tracing"
Expand Down Expand Up @@ -105,25 +108,20 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
return
}

// Optimization 1:
// List children without fetching their root blocks (fast, but no size info)
results, err := i.api.Unixfs().Ls(ctx, resolvedPath, options.Unixfs.ResolveChildren(false))
// Optimization: use Dag.Get to fetch the children links of this directory
// instead of UnixFS.LS. Dag.Get is faster and also provides a Size field
// that is good enough for a directory listing.
links, err := i.getUnixFsLinks(ctx, resolvedPath.Cid())
if err != nil {
internalWebError(w, err)
return
}

// storage for directory listing
dirListing := make([]directoryItem, 0, len(results))

for link := range results {
if link.Err != nil {
internalWebError(w, err)
return
}
dirListing := make([]directoryItem, 0, len(links))
for _, link := range links {
hash := link.Cid.String()
di := directoryItem{
Size: "", // no size because we did not fetch child nodes
Size: humanize.Bytes(uint64(link.Size)),
Name: link.Name,
Path: gopath.Join(originalURLPath, link.Name),
Hash: hash,
Expand All @@ -132,21 +130,6 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
dirListing = append(dirListing, di)
}

// Optimization 2: fetch sizes only for dirs below FastDirIndexThreshold
if len(dirListing) < i.config.FastDirIndexThreshold {
dirit := dir.Entries()
linkNo := 0
for dirit.Next() {
size := "?"
if s, err := dirit.Node().Size(); err == nil {
// Size may not be defined/supported. Continue anyways.
size = humanize.Bytes(uint64(s))
}
dirListing[linkNo].Size = size
linkNo++
}
}

// construct the correct back link
// https://github.com/ipfs/kubo/issues/1365
backLink := originalURLPath
Expand Down Expand Up @@ -195,15 +178,14 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit

// See comment above where originalUrlPath is declared.
tplData := listingTemplateData{
GatewayURL: gwURL,
DNSLink: dnslink,
Listing: dirListing,
Size: size,
Path: contentPath.String(),
Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
BackLink: backLink,
Hash: hash,
FastDirIndexThreshold: i.config.FastDirIndexThreshold,
GatewayURL: gwURL,
DNSLink: dnslink,
Listing: dirListing,
Size: size,
Path: contentPath.String(),
Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
BackLink: backLink,
Hash: hash,
}

logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash)
Expand All @@ -220,3 +202,36 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
func getDirListingEtag(dirCid cid.Cid) string {
return `"DirIndex-` + assets.AssetHash + `_CID-` + dirCid.String() + `"`
}

func (i *gatewayHandler) getUnixFsLinks(ctx context.Context, cid cid.Cid) ([]*format.Link, error) {
obj, err := i.api.Dag().Get(ctx, cid)
if err != nil {
return nil, err
}

protoNode, ok := obj.(*merkledag.ProtoNode)
if !ok {
return obj.Links(), nil
}

fsNode, err := unixfs.FSNodeFromBytes(protoNode.Data())
if err != nil {
return nil, err
}

if fsNode.Type() == unixfs.THAMTShard {
shard, err := hamt.NewHamtFromDag(i.api.Dag(), obj)
if err != nil {
return nil, err
}

links, err := shard.EnumLinks(ctx)
if err != nil {
return nil, err
}

return links, nil
} else {
return obj.Links(), nil
}
}
17 changes: 8 additions & 9 deletions core/corehttp/gateway_indexPage.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ import (

// structs for directory listing
type listingTemplateData struct {
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
FastDirIndexThreshold int
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
}

type directoryItem struct {
Expand Down
15 changes: 0 additions & 15 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ config file at runtime.
- [`Gateway.NoDNSLink`](#gatewaynodnslink)
- [`Gateway.HTTPHeaders`](#gatewayhttpheaders)
- [`Gateway.RootRedirect`](#gatewayrootredirect)
- [`Gateway.FastDirIndexThreshold`](#gatewayfastdirindexthreshold)
- [`Gateway.Writable`](#gatewaywritable)
- [`Gateway.PathPrefixes`](#gatewaypathprefixes)
- [`Gateway.PublicGateways`](#gatewaypublicgateways)
Expand Down Expand Up @@ -674,20 +673,6 @@ Default: `""`

Type: `string` (url)

### `Gateway.FastDirIndexThreshold`

The maximum number of items in a directory before the Gateway switches
to a shallow, faster listing which only requires the root node.

This allows for fast listings of big directories, without the linear slowdown caused
by reading size metadata from child nodes.

Setting to 0 will enable fast listings for all directories.

Default: `100`

Type: `optionalInteger`

### `Gateway.Writable`

A boolean to configure whether the gateway is writeable or not.
Expand Down
22 changes: 0 additions & 22 deletions test/sharness/t0115-gateway-dir-listing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -163,28 +163,6 @@ test_expect_success "dnslink gw: hash column should be a CID link to cid.ipfs.te
test_should_contain "<a class=\"ipfs-hash\" translate=\"no\" href=\"https://cid.ipfs.tech/#$FILE_CID\" target=\"_blank\" rel=\"noreferrer noopener\">" list_response
'

## ============================================================================
## Test dir listing of a big directory
## ============================================================================

test_expect_success "dir listing should resolve child sizes if under Gateway.FastDirIndexThreshold" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response &&
test_should_contain "/ipfs/${FILE_CID}?filename" list_response &&
test_should_contain ">${FILE_SIZE} B</td>" list_response
'

# force fast dir index for all responses
ipfs config --json Gateway.FastDirIndexThreshold 0
# restart daemon to apply config changes
test_kill_ipfs_daemon
test_launch_ipfs_daemon

test_expect_success "dir listing should not resolve child sizes beyond Gateway.FastDirIndexThreshold" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response &&
test_should_contain "/ipfs/${FILE_CID}?filename" list_response &&
test_should_not_contain ">${FILE_SIZE} B</td>" list_response
'

## ============================================================================
## End of tests, cleanup
## ============================================================================
Expand Down

0 comments on commit 233ff43

Please sign in to comment.