Skip to content

Commit

Permalink
WIP _redirects refactored
Browse files Browse the repository at this point in the history
  • Loading branch information
Justin Johnson committed Apr 10, 2022
1 parent 957fc24 commit 65d3b47
Show file tree
Hide file tree
Showing 5 changed files with 343 additions and 18 deletions.
150 changes: 132 additions & 18 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,31 +292,24 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
return
}

// Resolve path to the final DAG node for the ETag
resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath)
switch err {
case nil:
case coreiface.ErrOffline:
webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable)
return
default:
// if Accept is text/html, see if ipfs-404.html is present
if i.servePretty404IfPresent(w, r, contentPath) {
logger.Debugw("serve pretty 404 if present")
return
}

webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound)
return
}

// Detect when explicit Accept header or ?format parameter are present
responseFormat, formatParams, err := customResponseFormat(r)
if err != nil {
webError(w, "error while processing the Accept header", err, http.StatusBadRequest)
return
}
trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat))

var ok bool
var resolvedPath ipath.Resolved
if responseFormat == "" {
resolvedPath, contentPath, ok = i.handleUnixfsPathResolution(w, r, responseFormat, contentPath, logger)
} else {
resolvedPath, contentPath, ok = i.handleNonUnixfsPathResolution(w, r, responseFormat, contentPath, logger)
}
if !ok {
return
}
trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String()))

// Finish early if client already has matching Etag
Expand Down Expand Up @@ -934,3 +927,124 @@ func (i *gatewayHandler) handledSetHeaders(w http.ResponseWriter, r *http.Reques

return false
}

func (i *gatewayHandler) handleNonUnixfsPathResolution(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, logger *zap.SugaredLogger) (ipath.Resolved, ipath.Path, bool) {
// Resolve the path for the provided contentPath
resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath)

switch err {
case nil:
return resolvedPath, contentPath, true
case coreiface.ErrOffline:
webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable)
return nil, nil, false
default:
webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound)
return nil, nil, false
}
}

// Resolve the provided path.
// If we can't resolve the path, then for Unixfs requests, look for a _redirects file in the root CID path.
// If _redirects file exists, attempt to match redirect rules for the path.
// If a rule matches, either redirect or rewrite as determined by the rule.
// For rewrites, we need to attempt to resolve the rewrite path as well, and if it doesn't resolve, this time we just return the error.
func (i *gatewayHandler) handleUnixfsPathResolution(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, logger *zap.SugaredLogger) (ipath.Resolved, ipath.Path, bool) {
// Resolve the path for the provided contentPath
resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath)

switch err {
case nil:
// TODO: I believe for the force option, we might need to short circuit this, and thus we would need to read the redirects file first
return resolvedPath, contentPath, true
case coreiface.ErrOffline:
webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable)
return nil, nil, false
default:
// If we can't resolve the path
// Only look for _redirects file if we have Unixfs and Origin isolation
if hasOriginIsolation(r) {
// Check for _redirects file and redirect as needed
redirectsFile, err := i.getRedirectsFile(r)
if err != nil {
switch err.(type) {
case resolver.ErrNoLink:
// _redirects files doesn't exist, so don't error
default:
// TODO(JJ): During tests we get multibase.ErrUnsupportedEncoding
// This comes from multibase and I assume is due to a fake or otherwise bad CID being in the test.
internalWebError(w, err)
return nil, nil, false
}
} else {
// _redirects file exists, so parse it and redirect
redirected, newPath, err := i.handleRedirectsFile(w, r, redirectsFile, logger)
if err != nil {
err = fmt.Errorf("trouble processing _redirects file at %q: %w", redirectsFile.String(), err)
internalWebError(w, err)
return nil, nil, false
}

if redirected {
return nil, nil, false
}

// 200 is treated as a rewrite, so update the path and continue
if newPath != "" {
// Reassign contentPath and resolvedPath since the URL was rewritten
contentPath = ipath.New(newPath)
resolvedPath, err = i.api.ResolvePath(r.Context(), contentPath)
if err != nil {
internalWebError(w, err)
return nil, nil, false
}
logger.Debugf("_redirects: 200 rewrite. newPath=%v", newPath)

return resolvedPath, contentPath, true
}
}
}

// if Accept is text/html, see if ipfs-404.html is present
// This logic isn't documented and will likely be removed at some point.
// Any 404 logic in _redirects above will have already run by this time, so it's really an extra fall back
if i.servePretty404IfPresent(w, r, contentPath) {
logger.Debugw("serve pretty 404 if present")
return nil, nil, false
}

// Fallback
webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound)
return nil, nil, false
}
}

func (i *gatewayHandler) serve404(w http.ResponseWriter, r *http.Request, content404Path ipath.Path) error {
resolved404Path, err := i.api.ResolvePath(r.Context(), content404Path)
if err != nil {
return err
}

node, err := i.api.Unixfs().Get(r.Context(), resolved404Path)
if err != nil {
return err
}
defer node.Close()

f, ok := node.(files.File)
if !ok {
return fmt.Errorf("could not convert node for 404 page to file")
}

size, err := f.Size()
if err != nil {
return fmt.Errorf("could not get size of 404 page")
}

log.Debugw("using _redirects 404 file", "path", content404Path)
w.Header().Set("Content-Type", "text/html")
w.Header().Set("Content-Length", strconv.FormatInt(size, 10))
w.WriteHeader(http.StatusNotFound)
_, err = io.CopyN(w, f, size)
return err
}
139 changes: 139 additions & 0 deletions core/corehttp/gateway_handler_unixfs__redirects.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package corehttp

import (
"errors"
"fmt"
"net/http"
gopath "path"
"strings"

files "github.com/ipfs/go-ipfs-files"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"github.com/tj/go-redirects"
"github.com/ucarion/urlpath"
"go.uber.org/zap"
)

func (i *gatewayHandler) handleRedirectsFile(w http.ResponseWriter, r *http.Request, redirectsFilePath ipath.Resolved, logger *zap.SugaredLogger) (bool, string, error) {
// Convert the path into a file node
node, err := i.api.Unixfs().Get(r.Context(), redirectsFilePath)
if err != nil {
return false, "", fmt.Errorf("could not get _redirects node: %v", err)
}
defer node.Close()

// Convert the node into a file
f, ok := node.(files.File)
if !ok {
return false, "", fmt.Errorf("could not convert _redirects node to file")
}

// Parse redirect rules from file
redirectRules, err := redirects.Parse(f)
if err != nil {
return false, "", fmt.Errorf("could not parse redirect rules: %v", err)
}
logger.Debugf("redirectRules=%v", redirectRules)

// Attempt to match a rule to the URL path, and perform the corresponding redirect or rewrite
pathParts := strings.Split(r.URL.Path, "/")
if len(pathParts) > 3 {
// All paths should start with /ipfs/cid/, so get the path after that
urlPath := "/" + strings.Join(pathParts[3:], "/")
rootPath := strings.Join(pathParts[:3], "/")
// Trim off the trailing /
urlPath = strings.TrimSuffix(urlPath, "/")

logger.Debugf("_redirects: urlPath=", urlPath)
for _, rule := range redirectRules {
// get rule.From, trim trailing slash, ...
fromPath := urlpath.New(strings.TrimSuffix(rule.From, "/"))
logger.Debugf("_redirects: fromPath=%v", strings.TrimSuffix(rule.From, "/"))
match, ok := fromPath.Match(urlPath)
if !ok {
continue
}

// We have a match! Perform substitutions.
toPath := rule.To
toPath = replacePlaceholders(toPath, match)
toPath = replaceSplat(toPath, match)

logger.Debugf("_redirects: toPath=%v", toPath)

// Rewrite
if rule.Status == 200 {
// Prepend the rootPath
toPath = rootPath + rule.To
return false, toPath, nil
}

// Or 404
if rule.Status == 404 {
toPath = rootPath + rule.To
content404Path := ipath.New(toPath)
err = i.serve404(w, r, content404Path)
return true, toPath, err
}

// Or redirect
http.Redirect(w, r, toPath, rule.Status)
return true, toPath, nil
}
}

// No redirects matched
return false, "", nil
}

func replacePlaceholders(to string, match urlpath.Match) string {
if len(match.Params) > 0 {
for key, value := range match.Params {
to = strings.ReplaceAll(to, ":"+key, value)
}
}

return to
}

func replaceSplat(to string, match urlpath.Match) string {
return strings.ReplaceAll(to, ":splat", match.Trailing)
}

// Returns a resolved path to the _redirects file located in the root CID path of the requested path
func (i *gatewayHandler) getRedirectsFile(r *http.Request) (ipath.Resolved, error) {
// r.URL.Path is the full ipfs path to the requested resource,
// regardless of whether path or subdomain resolution is used.
rootPath, err := getRootPath(r.URL.Path)
if err != nil {
return nil, err
}

path := ipath.New(gopath.Join(rootPath, "_redirects"))
resolvedPath, err := i.api.ResolvePath(r.Context(), path)
if err != nil {
return nil, err
}
return resolvedPath, nil
}

// Returns the root CID path for the given path
func getRootPath(path string) (string, error) {
if strings.HasPrefix(path, ipfsPathPrefix) && strings.Count(gopath.Clean(path), "/") >= 2 {
parts := strings.Split(path, "/")
return gopath.Join(ipfsPathPrefix, parts[2]), nil
} else {
return "", errors.New("failed to get root CID path")
}
}

func hasOriginIsolation(r *http.Request) bool {
_, gw := r.Context().Value("gw-hostname").(string)
_, dnslink := r.Context().Value("dnslink-hostname").(string)

if gw || dnslink {
return true
} else {
return false
}
}
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ require (
github.com/prometheus/client_golang v1.11.0
github.com/stretchr/testify v1.7.0
github.com/syndtr/goleveldb v1.0.0
github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37 // indirect
github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb // indirect
github.com/whyrusleeping/go-sysinfo v0.0.0-20190219211824-4a357d4b90b1
github.com/whyrusleeping/multiaddr-filter v0.0.0-20160516205228-e903e4adabd7
go.opencensus.io v0.23.0
Expand All @@ -121,4 +123,6 @@ require (
golang.org/x/sys v0.0.0-20211025112917-711f33c9992c
)

replace github.com/tj/go-redirects => ../go-redirects

go 1.16
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1368,9 +1368,14 @@ github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpP
github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e h1:T5PdfK/M1xyrHwynxMIVMWLS7f/qHwfslZphxtGnw7s=
github.com/texttheater/golang-levenshtein v0.0.0-20180516184445-d188e65d659e/go.mod h1:XDKHRm5ThF8YJjx001LtgelzsoaEcvnA7lVWz9EeX3g=
github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pvk=
github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37 h1:K11tjwz8zTTSZkz4TUjfLN+y8uJWP38BbyPqZ2yB/Yk=
github.com/tj/go-redirects v0.0.0-20200911105812-fd1ba1020b37/go.mod h1:E0E2H2gQA+uoi27VCSU+a/BULPtadQA78q3cpTjZbZw=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ=
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM=
github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb h1:Ywfo8sUltxogBpFuMOFRrrSifO788kAFxmvVw31PtQQ=
github.com/ucarion/urlpath v0.0.0-20200424170820-7ccc79b76bbb/go.mod h1:ikPs9bRWicNw3S7XpJ8sK/smGwU9WcSVU3dy9qahYBM=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
Expand Down Expand Up @@ -1951,6 +1956,7 @@ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
grpc.go4.org v0.0.0-20170609214715-11d0a25b4919/go.mod h1:77eQGdRu53HpSqPFJFmuJdjuHRquDANNeA4x7B8WQ9o=
Expand Down
Loading

0 comments on commit 65d3b47

Please sign in to comment.