From eefc0508a824935f72b378300660c1b15fda8b9f Mon Sep 17 00:00:00 2001 From: Movitz Sunar Date: Mon, 1 Jul 2024 03:05:02 +0200 Subject: [PATCH] don't follow preconnect and dns-prefetch links --- link_finder.go | 10 ++++++++++ link_finder_test.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/link_finder.go b/link_finder.go index 43cf934..beee40c 100644 --- a/link_finder.go +++ b/link_finder.go @@ -40,6 +40,16 @@ func (f linkFinder) Find(n *html.Node, base *url.URL) map[string]error { _, ok := atomToAttributes[n.DataAtom] return ok }) { + + // https://github.com/raviqqe/muffet/issues/391 + // preconnect and dns-prefetch hrefs are not http resources + if n.DataAtom == atom.Link { + rel := scrape.Attr(n, "rel") + if rel == "preconnect" || rel == "dns-prefetch" { + continue + } + } + for _, a := range atomToAttributes[n.DataAtom] { ss := f.parseLinks(n, a) diff --git a/link_finder_test.go b/link_finder_test.go index 20a3072..7980c5f 100644 --- a/link_finder_test.go +++ b/link_finder_test.go @@ -217,6 +217,36 @@ func TestLinkFinderIgnoreMetaTags(t *testing.T) { assert.Len(t, ls, 0) } +func TestLinkFinderIgnorePreconnect(t *testing.T) { + b, err := url.Parse("https://localhost") + assert.Nil(t, err) + + for _, c := range []struct { + html string + linkCount int + }{ + {``, 0}, + {``, 0}, + {``, 1}, + } { + n, err := html.Parse(strings.NewReader(htmlWithBody(c.html))) + assert.Nil(t, err) + + s, e := 0, 0 + + for _, err := range newTestLinkFinder().Find(n, b) { + if err == nil { + s++ + } else { + e++ + } + } + + assert.Equal(t, c.linkCount, s) + assert.Equal(t, 0, e) + } +} + func htmlWithBody(b string) string { return fmt.Sprintf(`%v`, b) }