Skip to content

Commit

Permalink
feat: fast directory listings with DAG Size column (#9481)
Browse files Browse the repository at this point in the history
Co-authored-by: Marcin Rataj <lidel@lidel.org>
  • Loading branch information
hacdias and lidel authored Dec 12, 2022
1 parent 579175f commit 7bdb341
Show file tree
Hide file tree
Showing 15 changed files with 85 additions and 126 deletions.
4 changes: 2 additions & 2 deletions assets/dir-index-html/dir-index.html
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
</div>
{{ if .Size }}
<div class="no-linebreak flex-shrink-1 ml-auto">
<strong>&nbsp;{{ .Size }}</strong>
<strong title="Cumulative size of IPFS DAG (data + metadata)">&nbsp;{{ .Size }}</strong>
</div>
{{ end }}
</div>
Expand Down Expand Up @@ -89,7 +89,7 @@
</a>
{{ end }}
</td>
<td class="no-linebreak">{{ .Size }}</td>
<td class="no-linebreak" title="Cumulative size of IPFS DAG (data + metadata)">{{ .Size }}</td>
</tr>
{{ end }}
</table>
Expand Down
4 changes: 2 additions & 2 deletions assets/dir-index-html/src/dir-index.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
</div>
{{ if .Size }}
<div class="no-linebreak flex-shrink-1 ml-auto">
<strong>&nbsp;{{ .Size }}</strong>
<strong title="Cumulative size of IPFS DAG (data + metadata)">&nbsp;{{ .Size }}</strong>
</div>
{{ end }}
</div>
Expand Down Expand Up @@ -88,7 +88,7 @@
</a>
{{ end }}
</td>
<td class="no-linebreak">{{ .Size }}</td>
<td class="no-linebreak" title="Cumulative size of IPFS DAG (data + metadata)">{{ .Size }}</td>
</tr>
{{ end }}
</table>
Expand Down
17 changes: 8 additions & 9 deletions assets/dir-index-html/test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ const templateFile = "../dir-index.html"

// Copied from go-ipfs/core/corehttp/gateway_indexPage.go
type listingTemplateData struct {
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
FastDirIndexThreshold int
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
}

type directoryItem struct {
Expand Down
8 changes: 0 additions & 8 deletions config/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,6 @@ type Gateway struct {
// PathPrefixes was removed: https://github.com/ipfs/go-ipfs/issues/7702
PathPrefixes []string

// FastDirIndexThreshold is the maximum number of items in a directory
// before the Gateway switches to a shallow, faster listing which only
// requires the root node. This allows for listing big directories fast,
// without the linear slowdown caused by reading size metadata from child
// nodes.
// Setting to 0 will enable fast listings for all directories.
FastDirIndexThreshold *OptionalInteger `json:",omitempty"`

// FIXME: Not yet implemented: https://github.com/ipfs/kubo/issues/8059
APICommands []string

Expand Down
46 changes: 25 additions & 21 deletions core/coreapi/unixfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,32 +271,36 @@ func (api *UnixfsAPI) processLink(ctx context.Context, linkres ft.LinkResult, se
lnk.Type = coreiface.TFile
lnk.Size = linkres.Link.Size
case cid.DagProtobuf:
if !settings.ResolveChildren {
break
}

linkNode, err := linkres.Link.GetNode(ctx, api.dag)
if err != nil {
lnk.Err = err
break
}

if pn, ok := linkNode.(*merkledag.ProtoNode); ok {
d, err := ft.FSNodeFromBytes(pn.Data())
if settings.ResolveChildren {
linkNode, err := linkres.Link.GetNode(ctx, api.dag)
if err != nil {
lnk.Err = err
break
}
switch d.Type() {
case ft.TFile, ft.TRaw:
lnk.Type = coreiface.TFile
case ft.THAMTShard, ft.TDirectory, ft.TMetadata:
lnk.Type = coreiface.TDirectory
case ft.TSymlink:
lnk.Type = coreiface.TSymlink
lnk.Target = string(d.Data())

if pn, ok := linkNode.(*merkledag.ProtoNode); ok {
d, err := ft.FSNodeFromBytes(pn.Data())
if err != nil {
lnk.Err = err
break
}
switch d.Type() {
case ft.TFile, ft.TRaw:
lnk.Type = coreiface.TFile
case ft.THAMTShard, ft.TDirectory, ft.TMetadata:
lnk.Type = coreiface.TDirectory
case ft.TSymlink:
lnk.Type = coreiface.TSymlink
lnk.Target = string(d.Data())
}
if !settings.UseCumulativeSize {
lnk.Size = d.FileSize()
}
}
lnk.Size = d.FileSize()
}

if settings.UseCumulativeSize {
lnk.Size = linkres.Link.Size
}
}

Expand Down
10 changes: 4 additions & 6 deletions core/corehttp/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ import (
)

type GatewayConfig struct {
Headers map[string][]string
Writable bool
FastDirIndexThreshold int
Headers map[string][]string
Writable bool
}

// NodeAPI defines the minimal set of API services required by a gateway handler
Expand Down Expand Up @@ -83,9 +82,8 @@ func GatewayOption(writable bool, paths ...string) ServeOption {
}

gateway := NewGatewayHandler(GatewayConfig{
Headers: headers,
Writable: writable,
FastDirIndexThreshold: int(cfg.Gateway.FastDirIndexThreshold.WithDefault(100)),
Headers: headers,
Writable: writable,
}, api, offlineAPI)

gateway = otelhttp.NewHandler(gateway, "Gateway.Request")
Expand Down
48 changes: 18 additions & 30 deletions core/corehttp/gateway_handler_unixfs_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,25 +105,29 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
return
}

// Optimization 1:
// List children without fetching their root blocks (fast, but no size info)
results, err := i.api.Unixfs().Ls(ctx, resolvedPath, options.Unixfs.ResolveChildren(false))
// Optimization: use Unixfs.Ls without resolving children, but using the
// cumulative DAG size as the file size. This allows for a fast listing
// while keeping a good enough Size field.
results, err := i.api.Unixfs().Ls(ctx,
resolvedPath,
options.Unixfs.ResolveChildren(false),
options.Unixfs.UseCumulativeSize(true),
)
if err != nil {
internalWebError(w, err)
return
}

// storage for directory listing
dirListing := make([]directoryItem, 0, len(results))

for link := range results {
if link.Err != nil {
internalWebError(w, err)
return
}

hash := link.Cid.String()
di := directoryItem{
Size: "", // no size because we did not fetch child nodes
Size: humanize.Bytes(uint64(link.Size)),
Name: link.Name,
Path: gopath.Join(originalURLPath, link.Name),
Hash: hash,
Expand All @@ -132,21 +136,6 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
dirListing = append(dirListing, di)
}

// Optimization 2: fetch sizes only for dirs below FastDirIndexThreshold
if len(dirListing) < i.config.FastDirIndexThreshold {
dirit := dir.Entries()
linkNo := 0
for dirit.Next() {
size := "?"
if s, err := dirit.Node().Size(); err == nil {
// Size may not be defined/supported. Continue anyways.
size = humanize.Bytes(uint64(s))
}
dirListing[linkNo].Size = size
linkNo++
}
}

// construct the correct back link
// https://github.com/ipfs/kubo/issues/1365
backLink := originalURLPath
Expand Down Expand Up @@ -195,15 +184,14 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit

// See comment above where originalUrlPath is declared.
tplData := listingTemplateData{
GatewayURL: gwURL,
DNSLink: dnslink,
Listing: dirListing,
Size: size,
Path: contentPath.String(),
Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
BackLink: backLink,
Hash: hash,
FastDirIndexThreshold: i.config.FastDirIndexThreshold,
GatewayURL: gwURL,
DNSLink: dnslink,
Listing: dirListing,
Size: size,
Path: contentPath.String(),
Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
BackLink: backLink,
Hash: hash,
}

logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash)
Expand Down
17 changes: 8 additions & 9 deletions core/corehttp/gateway_indexPage.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ import (

// structs for directory listing
type listingTemplateData struct {
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
FastDirIndexThreshold int
GatewayURL string
DNSLink bool
Listing []directoryItem
Size string
Path string
Breadcrumbs []breadcrumb
BackLink string
Hash string
}

type directoryItem struct {
Expand Down
11 changes: 11 additions & 0 deletions docs/changelogs/v0.18.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Below is an outline of all that is in this release, so you get a sense of all th
- [Overview](#overview)
- [🔦 Highlights](#-highlights)
- [(DAG-)JSON and (DAG-)CBOR Response Formats on Gateways](#dag-json-and-dag-cbor-response-formats-on-gateways)
- [🐎 Fast directory listings with DAG sizes](#-fast-directory-listings-with-dag-sizes)
- [Content Routing](#content-routing)
- [Provider Record Republish and Expiration](#provider-record-republish-and-expiration)
- [Lowered `ConnMgr`](#lowered-connmgr)
Expand Down Expand Up @@ -71,6 +72,16 @@ $ curl "http://127.0.0.1:8080/ipfs/$DIR_CID?format=dag-json" | jq
}
```

#### 🐎 Fast directory listings with DAG sizes

Fast listings are now enabled for _all_ UnixFS directories: big and small.
There is no linear slowdown caused by reading size metadata from child nodes,
and the size of DAG representing child items is always present.

As an example, the CID
`bafybeiggvykl7skb2ndlmacg2k5modvudocffxjesexlod2pfvg5yhwrqm` represents UnixFS
directory with over 10k (10100) of files. Listing big directories was fast
since Kubo 0.13, but in this release it will also include the size column.

#### Content Routing

Expand Down
12 changes: 1 addition & 11 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -672,17 +672,7 @@ Type: `string` (url)

### `Gateway.FastDirIndexThreshold`

The maximum number of items in a directory before the Gateway switches
to a shallow, faster listing which only requires the root node.

This allows for fast listings of big directories, without the linear slowdown caused
by reading size metadata from child nodes.

Setting to 0 will enable fast listings for all directories.

Default: `100`

Type: `optionalInteger`
**REMOVED**: this option is [no longer necessary](https://github.com/ipfs/kubo/pull/9481). Ignored since [Kubo 0.18](https://github.com/ipfs/kubo/blob/master/docs/changelogs/v0.18.md).

### `Gateway.Writable`

Expand Down
2 changes: 1 addition & 1 deletion docs/examples/kubo-as-a-library/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ replace github.com/ipfs/kubo => ./../../..

require (
github.com/ipfs/go-ipfs-files v0.2.0
github.com/ipfs/interface-go-ipfs-core v0.8.0
github.com/ipfs/interface-go-ipfs-core v0.8.1
github.com/ipfs/kubo v0.14.0-rc1
github.com/libp2p/go-libp2p v0.24.1
github.com/multiformats/go-multiaddr v0.8.0
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/kubo-as-a-library/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -598,8 +598,8 @@ github.com/ipfs/go-unixfsnode v1.4.0/go.mod h1:qc7YFFZ8tABc58p62HnIYbUMwj9chhUuF
github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZc0g37pY0=
github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs=
github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU=
github.com/ipfs/interface-go-ipfs-core v0.8.0 h1:pNs34l947fvNOh+XEjXnHW/GV6HXmEzJNeqZFhX4GoQ=
github.com/ipfs/interface-go-ipfs-core v0.8.0/go.mod h1:WYC2H6Mu7aGqhlupi/CVawcs0X1Me4uRvV0rcTlo3zM=
github.com/ipfs/interface-go-ipfs-core v0.8.1 h1:nuFG0YJ429Wd5gtRb3ivlblpknZ5VfDVKZkmOG2TnNQ=
github.com/ipfs/interface-go-ipfs-core v0.8.1/go.mod h1:WYC2H6Mu7aGqhlupi/CVawcs0X1Me4uRvV0rcTlo3zM=
github.com/ipld/edelweiss v0.2.0 h1:KfAZBP8eeJtrLxLhi7r3N0cBCo7JmwSRhOJp3WSpNjk=
github.com/ipld/edelweiss v0.2.0/go.mod h1:FJAzJRCep4iI8FOFlRriN9n0b7OuX3T/S9++NpBDmA4=
github.com/ipld/go-car v0.4.0 h1:U6W7F1aKF/OJMHovnOVdst2cpQE5GhmHibQkAixgNcQ=
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ require (
github.com/ipfs/go-unixfs v0.4.1
github.com/ipfs/go-unixfsnode v1.4.0
github.com/ipfs/go-verifcid v0.0.2
github.com/ipfs/interface-go-ipfs-core v0.8.0
github.com/ipfs/interface-go-ipfs-core v0.8.1
github.com/ipld/go-car v0.4.0
github.com/ipld/go-car/v2 v2.4.0
github.com/ipld/go-codec-dagpb v1.4.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -625,8 +625,8 @@ github.com/ipfs/go-unixfsnode v1.4.0/go.mod h1:qc7YFFZ8tABc58p62HnIYbUMwj9chhUuF
github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZc0g37pY0=
github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs=
github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU=
github.com/ipfs/interface-go-ipfs-core v0.8.0 h1:pNs34l947fvNOh+XEjXnHW/GV6HXmEzJNeqZFhX4GoQ=
github.com/ipfs/interface-go-ipfs-core v0.8.0/go.mod h1:WYC2H6Mu7aGqhlupi/CVawcs0X1Me4uRvV0rcTlo3zM=
github.com/ipfs/interface-go-ipfs-core v0.8.1 h1:nuFG0YJ429Wd5gtRb3ivlblpknZ5VfDVKZkmOG2TnNQ=
github.com/ipfs/interface-go-ipfs-core v0.8.1/go.mod h1:WYC2H6Mu7aGqhlupi/CVawcs0X1Me4uRvV0rcTlo3zM=
github.com/ipld/edelweiss v0.2.0 h1:KfAZBP8eeJtrLxLhi7r3N0cBCo7JmwSRhOJp3WSpNjk=
github.com/ipld/edelweiss v0.2.0/go.mod h1:FJAzJRCep4iI8FOFlRriN9n0b7OuX3T/S9++NpBDmA4=
github.com/ipld/go-car v0.4.0 h1:U6W7F1aKF/OJMHovnOVdst2cpQE5GhmHibQkAixgNcQ=
Expand Down
22 changes: 0 additions & 22 deletions test/sharness/t0115-gateway-dir-listing.sh
Original file line number Diff line number Diff line change
Expand Up @@ -163,28 +163,6 @@ test_expect_success "dnslink gw: hash column should be a CID link to cid.ipfs.te
test_should_contain "<a class=\"ipfs-hash\" translate=\"no\" href=\"https://cid.ipfs.tech/#$FILE_CID\" target=\"_blank\" rel=\"noreferrer noopener\">" list_response
'

## ============================================================================
## Test dir listing of a big directory
## ============================================================================

test_expect_success "dir listing should resolve child sizes if under Gateway.FastDirIndexThreshold" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response &&
test_should_contain "/ipfs/${FILE_CID}?filename" list_response &&
test_should_contain ">${FILE_SIZE} B</td>" list_response
'

# force fast dir index for all responses
ipfs config --json Gateway.FastDirIndexThreshold 0
# restart daemon to apply config changes
test_kill_ipfs_daemon
test_launch_ipfs_daemon

test_expect_success "dir listing should not resolve child sizes beyond Gateway.FastDirIndexThreshold" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response &&
test_should_contain "/ipfs/${FILE_CID}?filename" list_response &&
test_should_not_contain ">${FILE_SIZE} B</td>" list_response
'

## ============================================================================
## End of tests, cleanup
## ============================================================================
Expand Down

0 comments on commit 7bdb341

Please sign in to comment.