Skip to content

Commit

Permalink
google CIDRs loading from official URL
Browse files Browse the repository at this point in the history
  • Loading branch information
pdazcom committed Sep 5, 2024
1 parent e99f3c9 commit fbf4ba1
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 50 deletions.
78 changes: 64 additions & 14 deletions botdetector.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ import (
"regexp"
"strings"
"fmt"
"os"
"encoding/json"
"sync"
"time"
)

// DNSResolver is an interface for DNS lookups
Expand Down Expand Up @@ -39,14 +40,13 @@ type Config struct {
ExcludeStatic bool `json:"excludeStatic,omitempty"`
StaticExtensions []string `json:"staticExtensions,omitempty"`
GoogleByIP bool `json:"googleByIP,omitempty"`
GoogleCIDRFile string `json:"googleCIDRFile,omitempty"`
IncludeGoogleIPs []string `json:"includeGoogleIPs,omitempty"`
}

func CreateConfig() *Config {
return &Config{
BotTag: "true",
StaticExtensions: []string{".css", ".js", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".woff", ".woff2", ".ttf", ".ico"},
GoogleCIDRFile: "google_cidr.json",
}
}

Expand All @@ -61,7 +61,9 @@ type BotMiddleware struct {
staticExtensions []string
googleByIP bool
googleCIDR []*net.IPNet
includeGoogleIPs []*net.IPNet
dnsResolver DNSResolver
mu sync.RWMutex
}

func New(ctx context.Context, next http.Handler, config *Config, name string) (http.Handler, error) {
Expand All @@ -79,10 +81,21 @@ func New(ctx context.Context, next http.Handler, config *Config, name string) (h
}

if middleware.googleByIP {
err := middleware.loadGoogleCIDR(config.GoogleCIDRFile)
err := middleware.loadGoogleCIDR()
if err != nil {
return nil, fmt.Errorf("failed to load Google CIDR list: %v", err)
}

for _, includeIP := range config.IncludeGoogleIPs {
_, cidr, err := net.ParseCIDR(includeIP)
if err != nil {
continue
}
middleware.includeGoogleIPs = append(middleware.includeGoogleIPs, cidr)
}

// start periodic loading of CIDR from URL
go middleware.periodicLoad(1 * time.Hour)
}

return middleware, nil
Expand Down Expand Up @@ -143,36 +156,73 @@ func (m *BotMiddleware) isRefererSameHost(req *http.Request) bool {
return refererHost == requestHost
}

func (m *BotMiddleware) loadGoogleCIDR(filePath string) error {
file, err := os.Open(filePath)
func (m *BotMiddleware) loadGoogleCIDR() error {
resp, err := http.Get("https://www.gstatic.com/ipranges/goog.json")
if err != nil {
return err
}
defer file.Close()
defer resp.Body.Close()

var cidrList []string
if err := json.NewDecoder(file).Decode(&cidrList); err != nil {
var data struct {
Prefixes []struct {
IPPrefix string `json:"ipv4Prefix"`
IP6Prefix string `json:"ipv6Prefix"`
} `json:"prefixes"`
}
if err := json.NewDecoder(resp.Body).Decode(&data); err != nil {
return err
}

for _, cidrStr := range cidrList {
_, cidr, err := net.ParseCIDR(cidrStr)
if err != nil {
continue
var parsedCIDR []*net.IPNet
for _, prefix := range data.Prefixes {
if prefix.IPPrefix != "" {
_, cidr, err := net.ParseCIDR(prefix.IPPrefix)
if err != nil {
continue
}
parsedCIDR = append(parsedCIDR, cidr)
}
if prefix.IP6Prefix != "" {
_, cidr, err := net.ParseCIDR(prefix.IP6Prefix)
if err != nil {
continue
}
parsedCIDR = append(parsedCIDR, cidr)
}
m.googleCIDR = append(m.googleCIDR, cidr)
}

// Обновляем список CIDR безопасным образом
m.mu.Lock()
m.googleCIDR = parsedCIDR
m.mu.Unlock()

return nil
}

func (m *BotMiddleware) periodicLoad(interval time.Duration) {
for {
m.loadGoogleCIDR()
time.Sleep(interval)
}
}

func (m *BotMiddleware) isGoogleBotByIP(ip string) bool {
m.mu.RLock()
defer m.mu.RUnlock()

ipNet := net.ParseIP(ip)
for _, cidr := range m.googleCIDR {
if cidr.Contains(ipNet) {
return true
}
}

for _, cidr := range m.includeGoogleIPs {
if cidr.Contains(ipNet) {
return true
}
}

return false
}

Expand Down
111 changes: 77 additions & 34 deletions botdetector_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package botdetector

import (
"context"
"errors"
"net"
"net/http"
"net/http/httptest"
"testing"
"bytes"
"io"
)

type MockDNSResolver struct{}
Expand Down Expand Up @@ -43,6 +46,19 @@ func (r *MockDNSResolver) LookupIP(hostname string) ([]net.IP, error) {
return nil, errors.New("unknown host")
}

// Мок-обработчик для имитации ответа от HTTP-сервера
type MockTransport struct {
Response *http.Response
Err error
}

func (m *MockTransport) RoundTrip(req *http.Request) (*http.Response, error) {
if m.Err != nil {
return nil, m.Err
}
return m.Response, nil
}

func TestIsSearchBot(t *testing.T) {
tests := []struct {
userAgent string
Expand Down Expand Up @@ -308,44 +324,71 @@ func TestServeHTTP_WithStaticFileAndExcludeStaticDisabled(t *testing.T) {
}

func TestGoogleBotByIP(t *testing.T) {
middleware := &BotMiddleware{
botsTo: "bots.com",
othersTo: "others.com",
googleByIP: true,
botTag: "true",
}

// load CIDR ranges from JSON
err := middleware.loadGoogleCIDR("test_cidr.json")
if err != nil {
t.Fatalf("Failed to load Google CIDR list: %v", err)
// create a mock response
mockResponse := `{
"prefixes": [
{"ipv4Prefix": "8.8.8.0/24"},
{"ipv4Prefix": "64.233.160.0/19"},
{"ipv4Prefix": "66.249.80.0/20"},
{"ipv4Prefix": "72.14.192.0/18"},
{"ipv4Prefix": "209.85.128.0/17"},
{"ipv6Prefix": "2001:4860:4860::8888/32"}
]
}`

// mocking the HTTP client
mockTransport := &MockTransport{
Response: &http.Response{
StatusCode: 200,
Header: make(http.Header),
Body: http.NoBody,
},
}
mockTransport.Response.Body = io.NopCloser(bytes.NewReader([]byte(mockResponse)))

// setting up the loading function using a mock conveyor
oldTransport := http.DefaultTransport
http.DefaultTransport = mockTransport
defer func() { http.DefaultTransport = oldTransport }() // restore the original vehicle after the test

middleware, _ := New(
context.Background(), http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}),
&Config{
BotsTo: "bots.com",
OthersTo: "others.com",
GoogleByIP: true,
BotTag: "true",
IncludeGoogleIPs: []string{"172.18.0.0/24", "192.168.1.1/32"},
},
"botdetector",
)

tests := []struct {
ip string
expectedLocation string
expectedStatus int
}{
{"64.233.160.1:12345", "http://bots.com", http.StatusFound}, // in google range
{"1.1.1.1:12345", "http://others.com", http.StatusFound}, // out of google range
{"172.18.0.1:12345", "http://bots.com", http.StatusFound}, // in include google range
{"192.168.1.1:12346", "http://bots.com", http.StatusFound}, // in include google range
}

// create a request with an IP from the Google range
req := httptest.NewRequest("GET", "http://localhost/", nil)
req.RemoteAddr = "64.233.160.1:12345" // IP в диапазоне Google

recorder := httptest.NewRecorder()
middleware.ServeHTTP(recorder, req)

// check that the request was redirected to botsTo
if location := recorder.Header().Get("Location"); location != "http://bots.com/" {
t.Errorf("Expected redirect to http://bots.com/, got %v", location)
}
if status := recorder.Code; status != http.StatusFound {
t.Errorf("Expected status 302 Found, got %v", status)
}
for _, test := range tests {

req2 := httptest.NewRequest("GET", "http://example.com", nil)
req2.RemoteAddr = "1.1.1.1:12345" // IP not in Google range
req := httptest.NewRequest("GET", "http://localhost", nil)
req.RemoteAddr = test.ip

recorder2 := httptest.NewRecorder()
middleware.ServeHTTP(recorder2, req2)
recorder := httptest.NewRecorder()
middleware.ServeHTTP(recorder, req)

if location := recorder2.Header().Get("Location"); location != "http://others.com" {
t.Errorf("Expected redirect to http://others.com, got %v", location)
}
if status := recorder2.Code; status != http.StatusFound {
t.Errorf("Expected status 302 Found, got %v", status)
}
// check that the request was redirected to expected location
if location := recorder.Header().Get("Location"); location != test.expectedLocation {
t.Errorf("Expected redirect to http://bots.com/, got %v", location)
}
if status := recorder.Code; status != test.expectedStatus {
t.Errorf("Expected status 302 Found, got %v", status)
}
}
}
4 changes: 2 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ experimental:
plugins:
botdetector:
modulename = "github.com/pdazcom/botdetector"
version = "v0.4.0"
version = "v0.4.1"

entryPoints:
http:
Expand All @@ -45,7 +45,7 @@ This plugin supports these configuration (all of them optional):
* `permanent`: sets the redirect type: 'true' - redirect code = 301, 'false' - 302. Default: false.
* `botTag`: header `X-SearchBot-Detected` value to mark search bots requests. Default: "true"
* `googleByIP`: sets whether to detect the google bot by IP only, user-agent and rDNS are ignored. Default: "false"
* `googleCIDRFile`: file with a list of CIDRs by which to check whether an IP belongs to Google (JSON array). Default: "google_cidr.json"
* `includeGoogleIPs`: List of CIDRs by which the Google bot is additionally identified.

**Note**: Leave `botsTo` and `othersTo` empty to only mark bot requests with the `X-SearchBot-Detected` header without redirects.

Expand Down

0 comments on commit fbf4ba1

Please sign in to comment.