From 233ff4315e5cb43457425b0f545454800420fc8d Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 8 Dec 2022 14:07:39 +0100 Subject: [PATCH] refactor: use Dag Size instead of UnixFS Size in Listing --- assets/dir-index-html/dir-index.html | 7 +- assets/dir-index-html/src/dir-index.html | 7 +- assets/dir-index-html/test/main.go | 17 ++-- config/gateway.go | 8 -- core/corehttp/gateway.go | 10 +-- core/corehttp/gateway_handler_unixfs_dir.go | 89 ++++++++++++--------- core/corehttp/gateway_indexPage.go | 17 ++-- docs/config.md | 15 ---- test/sharness/t0115-gateway-dir-listing.sh | 22 ----- 9 files changed, 82 insertions(+), 110 deletions(-) diff --git a/assets/dir-index-html/dir-index.html b/assets/dir-index-html/dir-index.html index 1d00e5fe73f..9d02d3849aa 100644 --- a/assets/dir-index-html/dir-index.html +++ b/assets/dir-index-html/dir-index.html @@ -53,10 +53,13 @@ {{ .Hash }} {{ end }} +

+ Preview as JSON. Download as: CAR, TAR, DAG-JSON, DAG-CBOR. +

{{ if .Size }}
-  {{ .Size }} +  {{ .Size }}
{{ end }} @@ -89,7 +92,7 @@ {{ end }} - {{ .Size }} + {{ .Size }} {{ end }} diff --git a/assets/dir-index-html/src/dir-index.html b/assets/dir-index-html/src/dir-index.html index a763b3e76bb..ed66e522315 100644 --- a/assets/dir-index-html/src/dir-index.html +++ b/assets/dir-index-html/src/dir-index.html @@ -52,10 +52,13 @@ {{ .Hash }} {{ end }} +

+ Preview as JSON. Download as: CAR, TAR, DAG-JSON, DAG-CBOR. +

{{ if .Size }}
-  {{ .Size }} +  {{ .Size }}
{{ end }} @@ -88,7 +91,7 @@ {{ end }} - {{ .Size }} + {{ .Size }} {{ end }} diff --git a/assets/dir-index-html/test/main.go b/assets/dir-index-html/test/main.go index 43b4a098101..c02523a9f40 100644 --- a/assets/dir-index-html/test/main.go +++ b/assets/dir-index-html/test/main.go @@ -12,15 +12,14 @@ const templateFile = "../dir-index.html" // Copied from go-ipfs/core/corehttp/gateway_indexPage.go type listingTemplateData struct { - GatewayURL string - DNSLink bool - Listing []directoryItem - Size string - Path string - Breadcrumbs []breadcrumb - BackLink string - Hash string - FastDirIndexThreshold int + GatewayURL string + DNSLink bool + Listing []directoryItem + Size string + Path string + Breadcrumbs []breadcrumb + BackLink string + Hash string } type directoryItem struct { diff --git a/config/gateway.go b/config/gateway.go index 8b8c65d1db5..ad01b263b36 100644 --- a/config/gateway.go +++ b/config/gateway.go @@ -45,14 +45,6 @@ type Gateway struct { // PathPrefixes was removed: https://github.com/ipfs/go-ipfs/issues/7702 PathPrefixes []string - // FastDirIndexThreshold is the maximum number of items in a directory - // before the Gateway switches to a shallow, faster listing which only - // requires the root node. This allows for listing big directories fast, - // without the linear slowdown caused by reading size metadata from child - // nodes. - // Setting to 0 will enable fast listings for all directories. - FastDirIndexThreshold *OptionalInteger `json:",omitempty"` - // FIXME: Not yet implemented: https://github.com/ipfs/kubo/issues/8059 APICommands []string diff --git a/core/corehttp/gateway.go b/core/corehttp/gateway.go index 0d0a234d946..334000b5ab3 100644 --- a/core/corehttp/gateway.go +++ b/core/corehttp/gateway.go @@ -18,9 +18,8 @@ import ( ) type GatewayConfig struct { - Headers map[string][]string - Writable bool - FastDirIndexThreshold int + Headers map[string][]string + Writable bool } // NodeAPI defines the minimal set of API services required by a gateway handler @@ -83,9 +82,8 @@ func GatewayOption(writable bool, paths ...string) ServeOption { } gateway := NewGatewayHandler(GatewayConfig{ - Headers: headers, - Writable: writable, - FastDirIndexThreshold: int(cfg.Gateway.FastDirIndexThreshold.WithDefault(100)), + Headers: headers, + Writable: writable, }, api, offlineAPI) gateway = otelhttp.NewHandler(gateway, "Gateway.Request") diff --git a/core/corehttp/gateway_handler_unixfs_dir.go b/core/corehttp/gateway_handler_unixfs_dir.go index 1c803b13b34..94474b4c222 100644 --- a/core/corehttp/gateway_handler_unixfs_dir.go +++ b/core/corehttp/gateway_handler_unixfs_dir.go @@ -11,9 +11,12 @@ import ( "github.com/dustin/go-humanize" cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" + format "github.com/ipfs/go-ipld-format" + merkledag "github.com/ipfs/go-merkledag" path "github.com/ipfs/go-path" "github.com/ipfs/go-path/resolver" - options "github.com/ipfs/interface-go-ipfs-core/options" + "github.com/ipfs/go-unixfs" + "github.com/ipfs/go-unixfs/hamt" ipath "github.com/ipfs/interface-go-ipfs-core/path" "github.com/ipfs/kubo/assets" "github.com/ipfs/kubo/tracing" @@ -105,25 +108,20 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit return } - // Optimization 1: - // List children without fetching their root blocks (fast, but no size info) - results, err := i.api.Unixfs().Ls(ctx, resolvedPath, options.Unixfs.ResolveChildren(false)) + // Optimization: use Dag.Get to fetch the children links of this directory + // instead of UnixFS.LS. Dag.Get is faster and also provides a Size field + // that is good enough for a directory listing. + links, err := i.getUnixFsLinks(ctx, resolvedPath.Cid()) if err != nil { internalWebError(w, err) return } - // storage for directory listing - dirListing := make([]directoryItem, 0, len(results)) - - for link := range results { - if link.Err != nil { - internalWebError(w, err) - return - } + dirListing := make([]directoryItem, 0, len(links)) + for _, link := range links { hash := link.Cid.String() di := directoryItem{ - Size: "", // no size because we did not fetch child nodes + Size: humanize.Bytes(uint64(link.Size)), Name: link.Name, Path: gopath.Join(originalURLPath, link.Name), Hash: hash, @@ -132,21 +130,6 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit dirListing = append(dirListing, di) } - // Optimization 2: fetch sizes only for dirs below FastDirIndexThreshold - if len(dirListing) < i.config.FastDirIndexThreshold { - dirit := dir.Entries() - linkNo := 0 - for dirit.Next() { - size := "?" - if s, err := dirit.Node().Size(); err == nil { - // Size may not be defined/supported. Continue anyways. - size = humanize.Bytes(uint64(s)) - } - dirListing[linkNo].Size = size - linkNo++ - } - } - // construct the correct back link // https://github.com/ipfs/kubo/issues/1365 backLink := originalURLPath @@ -195,15 +178,14 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit // See comment above where originalUrlPath is declared. tplData := listingTemplateData{ - GatewayURL: gwURL, - DNSLink: dnslink, - Listing: dirListing, - Size: size, - Path: contentPath.String(), - Breadcrumbs: breadcrumbs(contentPath.String(), dnslink), - BackLink: backLink, - Hash: hash, - FastDirIndexThreshold: i.config.FastDirIndexThreshold, + GatewayURL: gwURL, + DNSLink: dnslink, + Listing: dirListing, + Size: size, + Path: contentPath.String(), + Breadcrumbs: breadcrumbs(contentPath.String(), dnslink), + BackLink: backLink, + Hash: hash, } logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash) @@ -220,3 +202,36 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit func getDirListingEtag(dirCid cid.Cid) string { return `"DirIndex-` + assets.AssetHash + `_CID-` + dirCid.String() + `"` } + +func (i *gatewayHandler) getUnixFsLinks(ctx context.Context, cid cid.Cid) ([]*format.Link, error) { + obj, err := i.api.Dag().Get(ctx, cid) + if err != nil { + return nil, err + } + + protoNode, ok := obj.(*merkledag.ProtoNode) + if !ok { + return obj.Links(), nil + } + + fsNode, err := unixfs.FSNodeFromBytes(protoNode.Data()) + if err != nil { + return nil, err + } + + if fsNode.Type() == unixfs.THAMTShard { + shard, err := hamt.NewHamtFromDag(i.api.Dag(), obj) + if err != nil { + return nil, err + } + + links, err := shard.EnumLinks(ctx) + if err != nil { + return nil, err + } + + return links, nil + } else { + return obj.Links(), nil + } +} diff --git a/core/corehttp/gateway_indexPage.go b/core/corehttp/gateway_indexPage.go index 19e444da3f0..b0db8ac1a18 100644 --- a/core/corehttp/gateway_indexPage.go +++ b/core/corehttp/gateway_indexPage.go @@ -12,15 +12,14 @@ import ( // structs for directory listing type listingTemplateData struct { - GatewayURL string - DNSLink bool - Listing []directoryItem - Size string - Path string - Breadcrumbs []breadcrumb - BackLink string - Hash string - FastDirIndexThreshold int + GatewayURL string + DNSLink bool + Listing []directoryItem + Size string + Path string + Breadcrumbs []breadcrumb + BackLink string + Hash string } type directoryItem struct { diff --git a/docs/config.md b/docs/config.md index 2aba83291fa..89ab0630f25 100644 --- a/docs/config.md +++ b/docs/config.md @@ -52,7 +52,6 @@ config file at runtime. - [`Gateway.NoDNSLink`](#gatewaynodnslink) - [`Gateway.HTTPHeaders`](#gatewayhttpheaders) - [`Gateway.RootRedirect`](#gatewayrootredirect) - - [`Gateway.FastDirIndexThreshold`](#gatewayfastdirindexthreshold) - [`Gateway.Writable`](#gatewaywritable) - [`Gateway.PathPrefixes`](#gatewaypathprefixes) - [`Gateway.PublicGateways`](#gatewaypublicgateways) @@ -674,20 +673,6 @@ Default: `""` Type: `string` (url) -### `Gateway.FastDirIndexThreshold` - -The maximum number of items in a directory before the Gateway switches -to a shallow, faster listing which only requires the root node. - -This allows for fast listings of big directories, without the linear slowdown caused -by reading size metadata from child nodes. - -Setting to 0 will enable fast listings for all directories. - -Default: `100` - -Type: `optionalInteger` - ### `Gateway.Writable` A boolean to configure whether the gateway is writeable or not. diff --git a/test/sharness/t0115-gateway-dir-listing.sh b/test/sharness/t0115-gateway-dir-listing.sh index 4b8cf7bc248..708e0c4cf8b 100755 --- a/test/sharness/t0115-gateway-dir-listing.sh +++ b/test/sharness/t0115-gateway-dir-listing.sh @@ -163,28 +163,6 @@ test_expect_success "dnslink gw: hash column should be a CID link to cid.ipfs.te test_should_contain "" list_response ' -## ============================================================================ -## Test dir listing of a big directory -## ============================================================================ - -test_expect_success "dir listing should resolve child sizes if under Gateway.FastDirIndexThreshold" ' - curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response && - test_should_contain "/ipfs/${FILE_CID}?filename" list_response && - test_should_contain ">${FILE_SIZE} B" list_response -' - -# force fast dir index for all responses -ipfs config --json Gateway.FastDirIndexThreshold 0 -# restart daemon to apply config changes -test_kill_ipfs_daemon -test_launch_ipfs_daemon - -test_expect_success "dir listing should not resolve child sizes beyond Gateway.FastDirIndexThreshold" ' - curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ | tee list_response && - test_should_contain "/ipfs/${FILE_CID}?filename" list_response && - test_should_not_contain ">${FILE_SIZE} B" list_response -' - ## ============================================================================ ## End of tests, cleanup ## ============================================================================