Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

On open repository open common cat file batch and batch-check #15667

Merged
merged 12 commits into from
May 10, 2021
27 changes: 18 additions & 9 deletions modules/context/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package context

import (
"context"
"fmt"
"io/ioutil"
"net/url"
Expand Down Expand Up @@ -393,7 +394,7 @@ func RepoIDAssignment() func(ctx *Context) {
}

// RepoAssignment returns a middleware to handle repository assignment
func RepoAssignment(ctx *Context) {
func RepoAssignment(ctx *Context) (cancel context.CancelFunc) {
var (
owner *models.User
err error
Expand Down Expand Up @@ -529,12 +530,12 @@ func RepoAssignment(ctx *Context) {
ctx.Repo.GitRepo = gitRepo

// We opened it, we should close it
defer func() {
cancel = func() {
// If it's been set to nil then assume someone else has closed it.
if ctx.Repo.GitRepo != nil {
ctx.Repo.GitRepo.Close()
}
}()
}

// Stop at this point when the repo is empty.
if ctx.Repo.Repository.IsEmpty {
Expand Down Expand Up @@ -619,6 +620,7 @@ func RepoAssignment(ctx *Context) {
ctx.Data["GoDocDirectory"] = prefix + "{/dir}"
ctx.Data["GoDocFile"] = prefix + "{/dir}/{file}#L{line}"
}
return
}

// RepoRefType type of repo reference
Expand All @@ -643,7 +645,7 @@ const (

// RepoRef handles repository reference names when the ref name is not
// explicitly given
func RepoRef() func(*Context) {
func RepoRef() func(*Context) context.CancelFunc {
// since no ref name is explicitly specified, ok to just use branch
return RepoRefByType(RepoRefBranch)
}
Expand Down Expand Up @@ -722,8 +724,8 @@ func getRefName(ctx *Context, pathType RepoRefType) string {

// RepoRefByType handles repository reference name for a specific type
// of repository reference
func RepoRefByType(refType RepoRefType) func(*Context) {
return func(ctx *Context) {
func RepoRefByType(refType RepoRefType) func(*Context) context.CancelFunc {
return func(ctx *Context) (cancel context.CancelFunc) {
// Empty repository does not have reference information.
if ctx.Repo.Repository.IsEmpty {
return
Expand All @@ -742,12 +744,12 @@ func RepoRefByType(refType RepoRefType) func(*Context) {
return
}
// We opened it, we should close it
defer func() {
cancel = func() {
// If it's been set to nil then assume someone else has closed it.
if ctx.Repo.GitRepo != nil {
ctx.Repo.GitRepo.Close()
}
}()
}
}

// Get default branch.
Expand Down Expand Up @@ -841,6 +843,7 @@ func RepoRefByType(refType RepoRefType) func(*Context) {
return
}
ctx.Data["CommitsCount"] = ctx.Repo.CommitsCount
return
}
}

Expand Down Expand Up @@ -899,12 +902,18 @@ func (ctx *Context) IssueTemplatesFromDefaultBranch() []api.IssueTemplate {
log.Debug("DataAsync: %v", err)
continue
}
defer r.Close()
closed := false
defer func() {
if !closed {
_ = r.Close()
}
}()
data, err := ioutil.ReadAll(r)
if err != nil {
log.Debug("ReadAll: %v", err)
continue
}
_ = r.Close()
var it api.IssueTemplate
content, err := markdown.ExtractMetadata(string(data), &it)
if err != nil {
Expand Down
55 changes: 46 additions & 9 deletions modules/git/batch_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,44 @@ import (
"strings"
)

// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
type WriteCloserError interface {
io.WriteCloser
CloseWithError(err error) error
}

// CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe()
cancel := func() {
_ = batchStdinReader.Close()
_ = batchStdinWriter.Close()
_ = batchStdoutReader.Close()
_ = batchStdoutWriter.Close()
}

go func() {
stderr := strings.Builder{}
err := NewCommand("cat-file", "--batch-check").RunInDirFullPipeline(repoPath, batchStdoutWriter, &stderr, batchStdinReader)
if err != nil {
_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
} else {
_ = batchStdoutWriter.Close()
_ = batchStdinReader.Close()
}
}()

// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
batchReader := bufio.NewReader(batchStdoutReader)

return batchStdinWriter, batchReader, cancel
}

// CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) {
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batchStdinReader, batchStdinWriter := io.Pipe()
batchStdoutReader, batchStdoutWriter := io.Pipe()
Expand Down Expand Up @@ -47,26 +82,28 @@ func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) {
// ReadBatchLine reads the header line from cat-file --batch
// We expect:
// <sha> SP <type> SP <size> LF
// sha is a 40byte not 20byte here
func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
sha, err = rd.ReadBytes(' ')
if err != nil {
return
}
sha = sha[:len(sha)-1]

typ, err = rd.ReadString(' ')
typ, err = rd.ReadString('\n')
if err != nil {
return
}
typ = typ[:len(typ)-1]

var sizeStr string
sizeStr, err = rd.ReadString('\n')
if err != nil {
idx := strings.Index(typ, " ")
if idx < 0 {
err = ErrNotExist{ID: string(sha)}
return
}
sizeStr := typ[idx+1 : len(typ)-1]
typ = typ[:idx]

size, err = strconv.ParseInt(sizeStr[:len(sizeStr)-1], 10, 64)
size, err = strconv.ParseInt(sizeStr, 10, 64)
return
}

Expand Down Expand Up @@ -128,7 +165,7 @@ headerLoop:
}

// Discard the rest of the commit
discard := size - n
discard := size - n + 1
for discard > math.MaxInt32 {
_, err := rd.Discard(math.MaxInt32)
if err != nil {
Expand Down
114 changes: 84 additions & 30 deletions modules/git/blob_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,48 +8,54 @@ package git

import (
"bufio"
"bytes"
"io"
"strconv"
"strings"
"io/ioutil"
"math"
)

// Blob represents a Git object.
type Blob struct {
ID SHA1

gotSize bool
size int64
repoPath string
name string
gotSize bool
size int64
name string
repo *Repository
}

// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
stdoutReader, stdoutWriter := io.Pipe()
wr, rd, cancel := b.repo.CatFileBatch()

go func() {
stderr := &strings.Builder{}
err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(b.repoPath, stdoutWriter, stderr, strings.NewReader(b.ID.String()+"\n"))
if err != nil {
err = ConcatenateError(err, stderr.String())
_ = stdoutWriter.CloseWithError(err)
} else {
_ = stdoutWriter.Close()
}
}()

bufReader := bufio.NewReader(stdoutReader)
_, _, size, err := ReadBatchLine(bufReader)
_, err := wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
stdoutReader.Close()
cancel()
return nil, err
}
_, _, size, err := ReadBatchLine(rd)
if err != nil {
cancel()
return nil, err
}
b.gotSize = true
b.size = size

return &LimitedReaderCloser{
R: bufReader,
C: stdoutReader,
N: size,
if size < 4096 {
bs, err := ioutil.ReadAll(io.LimitReader(rd, size))
if err != nil {
cancel()
return nil, err
}
_, err = rd.Discard(1)
return ioutil.NopCloser(bytes.NewReader(bs)), err
}

return &blobReader{
rd: rd,
n: size,
cancel: cancel,
}, nil
}

Expand All @@ -59,18 +65,66 @@ func (b *Blob) Size() int64 {
return b.size
}

size, err := NewCommand("cat-file", "-s", b.ID.String()).RunInDir(b.repoPath)
wr, rd, cancel := b.repo.CatFileBatchCheck()
defer cancel()
_, err := wr.Write([]byte(b.ID.String() + "\n"))
if err != nil {
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repoPath, err)
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}

b.size, err = strconv.ParseInt(size[:len(size)-1], 10, 64)
_, _, b.size, err = ReadBatchLine(rd)
if err != nil {
log("error whilst parsing size %s for %s in %s. Error: %v", size, b.ID.String(), b.repoPath, err)
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
return 0
}

b.gotSize = true

return b.size
}

type blobReader struct {
rd *bufio.Reader
n int64
cancel func()
}

func (b *blobReader) Read(p []byte) (n int, err error) {
if b.n <= 0 {
return 0, io.EOF
}
if int64(len(p)) > b.n {
p = p[0:b.n]
}
n, err = b.rd.Read(p)
b.n -= int64(n)
return
}

// Close implements io.Closer
func (b *blobReader) Close() error {
if b.n > 0 {
for b.n > math.MaxInt32 {
n, err := b.rd.Discard(math.MaxInt32)
b.n -= int64(n)
if err != nil {
b.cancel()
return err
}
b.n -= math.MaxInt32
}
n, err := b.rd.Discard(int(b.n))
b.n -= int64(n)
if err != nil {
b.cancel()
return err
}
}
if b.n == 0 {
_, err := b.rd.Discard(1)
b.n--
b.cancel()
return err
}
return nil
}
5 changes: 3 additions & 2 deletions modules/git/blob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ func TestBlob_Data(t *testing.T) {
r, err := testBlob.DataAsync()
assert.NoError(t, err)
require.NotNil(t, r)
defer r.Close()

data, err := ioutil.ReadAll(r)
assert.NoError(t, r.Close())

assert.NoError(t, err)
assert.Equal(t, output, string(data))
}
Expand All @@ -54,7 +55,7 @@ func Benchmark_Blob_Data(b *testing.B) {
if err != nil {
b.Fatal(err)
}
defer r.Close()
ioutil.ReadAll(r)
_ = r.Close()
}
}
Loading