From 59b3312f61cf08989219c70f0219fa5ba82915cd Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Fri, 23 Sep 2022 11:44:19 -0500 Subject: [PATCH] feat(gateway): _redirects file support (#8890) https://github.com/ipfs/kubo/pull/8890 https://github.com/ipfs/specs/pull/290 This commit was moved from ipfs/kubo@bcaacdd6c3168392c2e2673e5a35e8a8387edbbc --- gateway/core/corehttp/gateway_handler.go | 136 +++------ .../gateway_handler_unixfs__redirects.go | 287 ++++++++++++++++++ .../corehttp/gateway_handler_unixfs_dir.go | 2 +- gateway/core/corehttp/hostname.go | 7 +- 4 files changed, 340 insertions(+), 92 deletions(-) create mode 100644 gateway/core/corehttp/gateway_handler_unixfs__redirects.go diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index ca3867001..585ca89cb 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -13,7 +13,6 @@ import ( gopath "path" "regexp" "runtime/debug" - "strconv" "strings" "time" @@ -378,23 +377,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - // Resolve path to the final DAG node for the ETag - resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) - switch err { - case nil: - case coreiface.ErrOffline: - webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) - return - default: - // if Accept is text/html, see if ipfs-404.html is present - if i.servePretty404IfPresent(w, r, contentPath) { - logger.Debugw("serve pretty 404 if present") - return - } - webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusBadRequest) - return - } - // Detect when explicit Accept header or ?format parameter are present responseFormat, formatParams, err := customResponseFormat(r) if err != nil { @@ -402,6 +384,11 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) + + resolvedPath, contentPath, ok := i.handlePathResolution(w, r, responseFormat, contentPath, logger) + if !ok { + return + } trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) // Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified @@ -450,36 +437,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } -func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { - resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) - if err != nil { - return false - } - - dr, err := i.api.Unixfs().Get(r.Context(), resolved404Path) - if err != nil { - return false - } - defer dr.Close() - - f, ok := dr.(files.File) - if !ok { - return false - } - - size, err := f.Size() - if err != nil { - return false - } - - log.Debugw("using pretty 404 file", "path", contentPath) - w.Header().Set("Content-Type", ctype) - w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) - w.WriteHeader(http.StatusNotFound) - _, err = io.CopyN(w, f, size) - return err == nil -} - func (i *gatewayHandler) postHandler(w http.ResponseWriter, r *http.Request) { p, err := i.api.Unixfs().Add(r.Context(), files.NewReaderFile(r.Body)) if err != nil { @@ -920,55 +877,56 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "", nil, nil } -func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { - filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) - if err != nil { - return nil, "", err +// returns unquoted path with all special characters revealed as \u codes +func debugStr(path string) string { + q := fmt.Sprintf("%+q", path) + if len(q) >= 3 { + q = q[1 : len(q)-1] } + return q +} - pathComponents := strings.Split(contentPath.String(), "/") - - for idx := len(pathComponents); idx >= 3; idx-- { - pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) - parsed404Path := ipath.New("/" + pretty404) - if parsed404Path.IsValid() != nil { - break - } - resolvedPath, err := i.api.ResolvePath(r.Context(), parsed404Path) - if err != nil { - continue - } - return resolvedPath, ctype, nil - } +// Resolve the provided contentPath including any special handling related to +// the requested responseFormat. Returned ok flag indicates if gateway handler +// should continue processing the request. +func (i *gatewayHandler) handlePathResolution(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, logger *zap.SugaredLogger) (resolvedPath ipath.Resolved, newContentPath ipath.Path, ok bool) { + // Attempt to resolve the provided path. + resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) - return nil, "", fmt.Errorf("no pretty 404 in any parent folder") -} + switch err { + case nil: + return resolvedPath, contentPath, true + case coreiface.ErrOffline: + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) + return nil, nil, false + default: + // The path can't be resolved. + if isUnixfsResponseFormat(responseFormat) { + // If we have origin isolation (subdomain gw, DNSLink website), + // and response type is UnixFS (default for website hosting) + // check for presence of _redirects file and apply rules defined there. + // See: https://github.com/ipfs/specs/pull/290 + if hasOriginIsolation(r) { + resolvedPath, newContentPath, ok, hadMatchingRule := i.serveRedirectsIfPresent(w, r, resolvedPath, contentPath, logger) + if hadMatchingRule { + logger.Debugw("applied a rule from _redirects file") + return resolvedPath, newContentPath, ok + } + } -func preferred404Filename(acceptHeaders []string) (string, string, error) { - // If we ever want to offer a 404 file for a different content type - // then this function will need to parse q weightings, but for now - // the presence of anything matching HTML is enough. - for _, acceptHeader := range acceptHeaders { - accepted := strings.Split(acceptHeader, ",") - for _, spec := range accepted { - contentType := strings.SplitN(spec, ";", 1)[0] - switch contentType { - case "*/*", "text/*", "text/html": - return "ipfs-404.html", "text/html", nil + // if Accept is text/html, see if ipfs-404.html is present + // This logic isn't documented and will likely be removed at some point. + // Any 404 logic in _redirects above will have already run by this time, so it's really an extra fall back + if i.serveLegacy404IfPresent(w, r, contentPath) { + logger.Debugw("served legacy 404") + return nil, nil, false } } - } - return "", "", fmt.Errorf("there is no 404 file for the requested content types") -} - -// returns unquoted path with all special characters revealed as \u codes -func debugStr(path string) string { - q := fmt.Sprintf("%+q", path) - if len(q) >= 3 { - q = q[1 : len(q)-1] + // Note: webError will replace http.StatusBadRequest with StatusNotFound if necessary + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusBadRequest) + return nil, nil, false } - return q } // Detect 'Cache-Control: only-if-cached' in request and return data if it is already in the local datastore. diff --git a/gateway/core/corehttp/gateway_handler_unixfs__redirects.go b/gateway/core/corehttp/gateway_handler_unixfs__redirects.go new file mode 100644 index 000000000..81cf05731 --- /dev/null +++ b/gateway/core/corehttp/gateway_handler_unixfs__redirects.go @@ -0,0 +1,287 @@ +package corehttp + +import ( + "fmt" + "io" + "net/http" + gopath "path" + "strconv" + "strings" + + files "github.com/ipfs/go-ipfs-files" + redirects "github.com/ipfs/go-ipfs-redirects-file" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.uber.org/zap" +) + +// Resolving a UnixFS path involves determining if the provided `path.Path` exists and returning the `path.Resolved` +// corresponding to that path. For UnixFS, path resolution is more involved. +// +// When a path under requested CID does not exist, Gateway will check if a `_redirects` file exists +// underneath the root CID of the path, and apply rules defined there. +// See sepcification introduced in: https://github.com/ipfs/specs/pull/290 +// +// Scenario 1: +// If a path exists, we always return the `path.Resolved` corresponding to that path, regardless of the existence of a `_redirects` file. +// +// Scenario 2: +// If a path does not exist, usually we should return a `nil` resolution path and an error indicating that the path +// doesn't exist. However, a `_redirects` file may exist and contain a redirect rule that redirects that path to a different path. +// We need to evaluate the rule and perform the redirect if present. +// +// Scenario 3: +// Another possibility is that the path corresponds to a rewrite rule (i.e. a rule with a status of 200). +// In this case, we don't perform a redirect, but do need to return a `path.Resolved` and `path.Path` corresponding to +// the rewrite destination path. +// +// Note that for security reasons, redirect rules are only processed when the request has origin isolation. +// See https://github.com/ipfs/specs/pull/290 for more information. +func (i *gatewayHandler) serveRedirectsIfPresent(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, logger *zap.SugaredLogger) (newResolvedPath ipath.Resolved, newContentPath ipath.Path, continueProcessing bool, hadMatchingRule bool) { + redirectsFile := i.getRedirectsFile(r, contentPath, logger) + if redirectsFile != nil { + redirectRules, err := i.getRedirectRules(r, redirectsFile) + if err != nil { + internalWebError(w, err) + return nil, nil, false, true + } + + redirected, newPath, err := i.handleRedirectsFileRules(w, r, contentPath, redirectRules) + if err != nil { + err = fmt.Errorf("trouble processing _redirects file at %q: %w", redirectsFile.String(), err) + internalWebError(w, err) + return nil, nil, false, true + } + + if redirected { + return nil, nil, false, true + } + + // 200 is treated as a rewrite, so update the path and continue + if newPath != "" { + // Reassign contentPath and resolvedPath since the URL was rewritten + contentPath = ipath.New(newPath) + resolvedPath, err = i.api.ResolvePath(r.Context(), contentPath) + if err != nil { + internalWebError(w, err) + return nil, nil, false, true + } + + return resolvedPath, contentPath, true, true + } + } + // No matching rule, paths remain the same, continue regular processing + return resolvedPath, contentPath, true, false +} + +func (i *gatewayHandler) handleRedirectsFileRules(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, redirectRules []redirects.Rule) (redirected bool, newContentPath string, err error) { + // Attempt to match a rule to the URL path, and perform the corresponding redirect or rewrite + pathParts := strings.Split(contentPath.String(), "/") + if len(pathParts) > 3 { + // All paths should start with /ipfs/cid/, so get the path after that + urlPath := "/" + strings.Join(pathParts[3:], "/") + rootPath := strings.Join(pathParts[:3], "/") + // Trim off the trailing / + urlPath = strings.TrimSuffix(urlPath, "/") + + for _, rule := range redirectRules { + // Error right away if the rule is invalid + if !rule.MatchAndExpandPlaceholders(urlPath) { + continue + } + + // We have a match! + + // Rewrite + if rule.Status == 200 { + // Prepend the rootPath + toPath := rootPath + rule.To + return false, toPath, nil + } + + // Or 4xx + if rule.Status == 404 || rule.Status == 410 || rule.Status == 451 { + toPath := rootPath + rule.To + content4xxPath := ipath.New(toPath) + err := i.serve4xx(w, r, content4xxPath, rule.Status) + return true, toPath, err + } + + // Or redirect + if rule.Status >= 301 && rule.Status <= 308 { + http.Redirect(w, r, rule.To, rule.Status) + return true, "", nil + } + } + } + + // No redirects matched + return false, "", nil +} + +func (i *gatewayHandler) getRedirectRules(r *http.Request, redirectsFilePath ipath.Resolved) ([]redirects.Rule, error) { + // Convert the path into a file node + node, err := i.api.Unixfs().Get(r.Context(), redirectsFilePath) + if err != nil { + return nil, fmt.Errorf("could not get _redirects: %w", err) + } + defer node.Close() + + // Convert the node into a file + f, ok := node.(files.File) + if !ok { + return nil, fmt.Errorf("could not parse _redirects: %w", err) + } + + // Parse redirect rules from file + redirectRules, err := redirects.Parse(f) + if err != nil { + return nil, fmt.Errorf("could not parse _redirects: %w", err) + } + + return redirectRules, nil +} + +// Returns a resolved path to the _redirects file located in the root CID path of the requested path +func (i *gatewayHandler) getRedirectsFile(r *http.Request, contentPath ipath.Path, logger *zap.SugaredLogger) ipath.Resolved { + // contentPath is the full ipfs path to the requested resource, + // regardless of whether path or subdomain resolution is used. + rootPath := getRootPath(contentPath) + + // Check for _redirects file. + // Any path resolution failures are ignored and we just assume there's no _redirects file. + // Note that ignoring these errors also ensures that the use of the empty CID (bafkqaaa) in tests doesn't fail. + path := ipath.Join(rootPath, "_redirects") + resolvedPath, err := i.api.ResolvePath(r.Context(), path) + if err != nil { + return nil + } + return resolvedPath +} + +// Returns the root CID Path for the given path +func getRootPath(path ipath.Path) ipath.Path { + parts := strings.Split(path.String(), "/") + return ipath.New(gopath.Join("/", path.Namespace(), parts[2])) +} + +func (i *gatewayHandler) serve4xx(w http.ResponseWriter, r *http.Request, content4xxPath ipath.Path, status int) error { + resolved4xxPath, err := i.api.ResolvePath(r.Context(), content4xxPath) + if err != nil { + return err + } + + node, err := i.api.Unixfs().Get(r.Context(), resolved4xxPath) + if err != nil { + return err + } + defer node.Close() + + f, ok := node.(files.File) + if !ok { + return fmt.Errorf("could not convert node for %d page to file", status) + } + + size, err := f.Size() + if err != nil { + return fmt.Errorf("could not get size of %d page", status) + } + + log.Debugf("using _redirects: custom %d file at %q", status, content4xxPath) + w.Header().Set("Content-Type", "text/html") + w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + addCacheControlHeaders(w, r, content4xxPath, resolved4xxPath.Cid()) + w.WriteHeader(status) + _, err = io.CopyN(w, f, size) + return err +} + +func hasOriginIsolation(r *http.Request) bool { + _, gw := r.Context().Value(requestContextKey("gw-hostname")).(string) + _, dnslink := r.Context().Value("dnslink-hostname").(string) + + if gw || dnslink { + return true + } + + return false +} + +func isUnixfsResponseFormat(responseFormat string) bool { + // The implicit response format is UnixFS + return responseFormat == "" +} + +// Deprecated: legacy ipfs-404.html files are superseded by _redirects file +// This is provided only for backward-compatibility, until websites migrate +// to 404s managed via _redirects file (https://github.com/ipfs/specs/pull/290) +func (i *gatewayHandler) serveLegacy404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { + resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) + if err != nil { + return false + } + + dr, err := i.api.Unixfs().Get(r.Context(), resolved404Path) + if err != nil { + return false + } + defer dr.Close() + + f, ok := dr.(files.File) + if !ok { + return false + } + + size, err := f.Size() + if err != nil { + return false + } + + log.Debugw("using pretty 404 file", "path", contentPath) + w.Header().Set("Content-Type", ctype) + w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) + w.WriteHeader(http.StatusNotFound) + _, err = io.CopyN(w, f, size) + return err == nil +} + +func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { + filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) + if err != nil { + return nil, "", err + } + + pathComponents := strings.Split(contentPath.String(), "/") + + for idx := len(pathComponents); idx >= 3; idx-- { + pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) + parsed404Path := ipath.New("/" + pretty404) + if parsed404Path.IsValid() != nil { + break + } + resolvedPath, err := i.api.ResolvePath(r.Context(), parsed404Path) + if err != nil { + continue + } + return resolvedPath, ctype, nil + } + + return nil, "", fmt.Errorf("no pretty 404 in any parent folder") +} + +func preferred404Filename(acceptHeaders []string) (string, string, error) { + // If we ever want to offer a 404 file for a different content type + // then this function will need to parse q weightings, but for now + // the presence of anything matching HTML is enough. + for _, acceptHeader := range acceptHeaders { + accepted := strings.Split(acceptHeader, ",") + for _, spec := range accepted { + contentType := strings.SplitN(spec, ";", 1)[0] + switch contentType { + case "*/*", "text/*", "text/html": + return "ipfs-404.html", "text/html", nil + } + } + } + + return "", "", fmt.Errorf("there is no 404 file for the requested content types") +} diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go index a6ab7cb55..511066f43 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_dir.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -185,7 +185,7 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit var gwURL string // Get gateway hostname and build gateway URL. - if h, ok := r.Context().Value("gw-hostname").(string); ok { + if h, ok := r.Context().Value(requestContextKey("gw-hostname")).(string); ok { gwURL = "//" + h } else { gwURL = "" diff --git a/gateway/core/corehttp/hostname.go b/gateway/core/corehttp/hostname.go index 3d022fc5a..5445740e6 100644 --- a/gateway/core/corehttp/hostname.go +++ b/gateway/core/corehttp/hostname.go @@ -221,7 +221,8 @@ func HostnameOption() ServeOption { if !cfg.Gateway.NoDNSLink && isDNSLinkName(r.Context(), coreAPI, host) { // rewrite path and handle as DNSLink r.URL.Path = "/ipns/" + stripPort(host) + r.URL.Path - childMux.ServeHTTP(w, withHostnameContext(r, host)) + ctx := context.WithValue(r.Context(), requestContextKey("dnslink-hostname"), host) + childMux.ServeHTTP(w, withHostnameContext(r.WithContext(ctx), host)) return } @@ -242,6 +243,8 @@ type wildcardHost struct { spec *config.GatewaySpec } +type requestContextKey string + // Extends request context to include hostname of a canonical gateway root // (subdomain root or dnslink fqdn) func withHostnameContext(r *http.Request, hostname string) *http.Request { @@ -250,7 +253,7 @@ func withHostnameContext(r *http.Request, hostname string) *http.Request { // Host header, subdomain gateways have more comples rules (knownSubdomainDetails) // More: https://github.com/ipfs/dir-index-html/issues/42 // nolint: staticcheck // non-backward compatible change - ctx := context.WithValue(r.Context(), "gw-hostname", hostname) + ctx := context.WithValue(r.Context(), requestContextKey("gw-hostname"), hostname) return r.WithContext(ctx) }