Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 add retry loop to graphQL commit queries which timeout on large github repos #3680

Merged
merged 7 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions clients/githubrepo/graphql.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
issueCommentsToAnalyze = 30
reviewsToAnalyze = 30
labelsToAnalyze = 30

spencerschrock marked this conversation as resolved.
Show resolved Hide resolved
// https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api#node-limit
defaultPageLimit = 100
retryLimit = 3
)

//nolint:govet
Expand Down Expand Up @@ -155,28 +159,35 @@
}

func populateCommits(handler *graphqlHandler, vars map[string]interface{}) ([]clients.Commit, error) {
var allCommits []clients.Commit
var commitsLeft githubv4.Int
var commits []clients.Commit
commitsLeft, ok := vars["commitsToAnalyze"].(githubv4.Int)
if !ok {
return nil, nil
return nil, sce.WithMessage(sce.ErrScorecardInternal, "unexpected type")

Check warning on line 165 in clients/githubrepo/graphql.go

View check run for this annotation

Codecov / codecov/patch

clients/githubrepo/graphql.go#L165

Added line #L165 was not covered by tests
}
for vars["commitsToAnalyze"] = githubv4.Int(100); commitsLeft > 0; commitsLeft = commitsLeft - 100 {
if commitsLeft < 100 {
vars["commitsToAnalyze"] = commitsLeft
}
err := handler.client.Query(handler.ctx, handler.data, vars)
if err != nil {
return nil, fmt.Errorf("failed to populate commits: %w", err)
commitsRequested := min(defaultPageLimit, commitsLeft)
var retries int
for commitsLeft > 0 {
vars["commitsToAnalyze"] = commitsRequested
if err := handler.client.Query(handler.ctx, handler.data, vars); err != nil {
// 502 usually indicate timeouts, where we're requesting too much data
// so make our requests smaller and try again
if retries < retryLimit && strings.Contains(err.Error(), "502 Bad Gateway") {
retries++
commitsRequested /= 2
continue
}
return nil, sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("githubv4.Query: %v", err))
}
vars["historyCursor"] = handler.data.Repository.Object.Commit.History.PageInfo.EndCursor
tmp, err := commitsFrom(handler.data, handler.repourl.owner, handler.repourl.repo)
if err != nil {
return nil, fmt.Errorf("failed to populate commits: %w", err)
}
allCommits = append(allCommits, tmp...)
commits = append(commits, tmp...)
commitsLeft -= commitsRequested
commitsRequested = min(commitsRequested, commitsLeft)

Check warning on line 188 in clients/githubrepo/graphql.go

View check run for this annotation

Codecov / codecov/patch

clients/githubrepo/graphql.go#L186-L188

Added lines #L186 - L188 were not covered by tests
}
return allCommits, nil
return commits, nil

Check warning on line 190 in clients/githubrepo/graphql.go

View check run for this annotation

Codecov / codecov/patch

clients/githubrepo/graphql.go#L190

Added line #L190 was not covered by tests
}

func (handler *graphqlHandler) setup() error {
Expand All @@ -194,18 +205,7 @@
"commitExpression": githubv4.String(commitExpression),
"historyCursor": (*githubv4.String)(nil),
}
// if NumberOfCommits set to < 99 we are required by the graphql to page by 100 commits.
if handler.commitDepth > 99 {
handler.commits, handler.errSetup = populateCommits(handler, vars)
handler.issues = issuesFrom(handler.data)
handler.archived = bool(handler.data.Repository.IsArchived)
return
}
if err := handler.client.Query(handler.ctx, handler.data, vars); err != nil {
handler.errSetup = sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("githubv4.Query: %v", err))
return
}
handler.commits, handler.errSetup = commitsFrom(handler.data, handler.repourl.owner, handler.repourl.repo)
handler.commits, handler.errSetup = populateCommits(handler, vars)
handler.issues = issuesFrom(handler.data)
handler.archived = bool(handler.data.Repository.IsArchived)
})
Expand Down
54 changes: 54 additions & 0 deletions clients/githubrepo/graphql_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright 2023 OpenSSF Scorecard Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package githubrepo

import (
"context"
"net/http"
"testing"

"github.com/shurcooL/githubv4"
)

type badGatewayRoundTripper struct {
requestCounter *int
}

func (b badGatewayRoundTripper) RoundTrip(*http.Request) (*http.Response, error) {
*b.requestCounter += 1
return &http.Response{
StatusCode: http.StatusBadGateway,
Status: "502 Bad Gateway",
}, nil
}

func Test_getCommits_retry(t *testing.T) {
var nRequests int
rt := badGatewayRoundTripper{requestCounter: &nRequests}
handler := graphqlHandler{
client: githubv4.NewClient(&http.Client{
Transport: rt,
}),
}
handler.init(context.Background(), &repoURL{}, 1)
_, err := handler.getCommits()
if err == nil {
t.Error("expected error")
}
want := retryLimit + 1 // 1 represents the initial request
if *rt.requestCounter != want {
t.Errorf("wanted %d retries, got %d", want, *rt.requestCounter)
}
}
Loading