From aaf9ff90a18d76be96c213ab037625dffed5c6ef Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 29 Mar 2023 20:01:53 +0200 Subject: [PATCH] fix: DefaultSaturnCarRequestTimeout 19s is not enough for fetching CAR stream of unknown length, every bigger request was failing. If we need to pick some ceiling, 30m sound like a good starting point (this is when CAR stream got timeouted on the old ipfs.io). --- caboose.go | 12 ++++++++++-- cmd/caboose/main.go | 2 +- fetcher.go | 20 +++++++++++++++++--- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/caboose.go b/caboose.go index beedfce..0743729 100644 --- a/caboose.go +++ b/caboose.go @@ -75,11 +75,19 @@ type Config struct { MaxNCoolOff int } +const DefaultLoggingInterval = 5 * time.Second +const DefaultSaturnLoggerRequestTimeout = 1 * time.Minute + +const DefaultSaturnOrchestratorRequestTimeout = 19 * time.Second + +const DefaultSaturnBlockRequestTimeout = 19 * time.Second +const DefaultSaturnCarRequestTimeout = 30 * time.Minute + const DefaultMaxRetries = 3 const DefaultPoolFailureDownvoteDebounce = 1 * time.Minute const DefaultPoolMembershipDebounce = 3 * DefaultPoolRefreshInterval const DefaultPoolLowWatermark = 5 -const DefaultSaturnRequestTimeout = 19 * time.Second + const maxBlockSize = 4194305 // 4 Mib + 1 byte const DefaultOrchestratorEndpoint = "https://orchestrator.strn.pl/nodes/nearby?count=1000" const DefaultPoolRefreshInterval = 5 * time.Minute @@ -188,7 +196,7 @@ func NewCaboose(config *Config) (*Caboose, error) { if c.config.SaturnClient == nil { c.config.SaturnClient = &http.Client{ - Timeout: DefaultSaturnRequestTimeout, + Timeout: DefaultSaturnCarRequestTimeout, } } if c.config.OrchestratorEndpoint == nil { diff --git a/cmd/caboose/main.go b/cmd/caboose/main.go index b0abb4b..d2dd65d 100644 --- a/cmd/caboose/main.go +++ b/cmd/caboose/main.go @@ -54,7 +54,7 @@ func main1() int { LoggingEndpoint: *le, LoggingClient: http.DefaultClient, - LoggingInterval: 5 * time.Second, + LoggingInterval: DefaultLoggingInterval, DoValidation: true, PoolRefresh: 5 * time.Minute, diff --git a/fetcher.go b/fetcher.go index 6e2c8a8..8846ac8 100644 --- a/fetcher.go +++ b/fetcher.go @@ -81,6 +81,11 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string, isCacheHit := false networkError := "" + isBlockRequest := false + if mime == "application/vnd.ipld.raw" { + isBlockRequest = true + } + defer func() { var ttfbMs int64 durationSecs := time.Since(start).Seconds() @@ -92,7 +97,7 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string, ttfbMs = fb.Sub(start).Milliseconds() fetchTTFBPerBlockPerPeerSuccessMetric.Observe(float64(ttfbMs)) // track individual block metrics separately - if mime == "application/vnd.ipld.raw" { + if isBlockRequest { fetchDurationPerBlockPerPeerSuccessMetric.Observe(float64(response_success_end.Sub(start).Milliseconds())) } else { fetchDurationPerCarPerPeerSuccessMetric.Observe(float64(response_success_end.Sub(start).Milliseconds())) @@ -100,7 +105,7 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string, fetchSpeedPerBlockPerPeerMetric.Observe(float64(received) / float64(durationMs)) } else { fetchTTFBPerBlockPerPeerFailureMetric.Observe(float64(ttfbMs)) - if mime == "application/vnd.ipld.raw" { + if isBlockRequest { fetchDurationPerBlockPerPeerFailureMetric.Observe(float64(time.Since(start).Milliseconds())) } else { fetchDurationPerCarPerPeerFailureMetric.Observe(float64(time.Since(start).Milliseconds())) @@ -145,7 +150,16 @@ func (p *pool) fetchResource(ctx context.Context, from string, resource string, } }() - reqCtx, cancel := context.WithTimeout(ctx, DefaultSaturnRequestTimeout) + // TODO: Ideally, we would have additional "PerRequestInactivityTimeout" + // which is the amount of time without any NEW data from the server, but + // that can be added later. We need both because a slow trickle of data + // could take a large amount of time. + requestTimeout := DefaultSaturnCarRequestTimeout + if isBlockRequest { + requestTimeout = DefaultSaturnBlockRequestTimeout + } + + reqCtx, cancel := context.WithTimeout(ctx, requestTimeout) defer cancel() req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, reqUrl, nil) if err != nil {