From 705e2567ad180d5e4944df5babe4de866a1aa61b Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 18 Feb 2021 15:46:50 +0800 Subject: [PATCH 01/25] Use storage to store archive files --- modules/context/context.go | 15 +++++++ modules/setting/repository.go | 7 +++ modules/storage/storage.go | 15 ++++++- routers/web/repo/repo.go | 51 +++++++++++++++++++++- services/archiver/archiver.go | 80 ++++++++++++----------------------- 5 files changed, 112 insertions(+), 56 deletions(-) diff --git a/modules/context/context.go b/modules/context/context.go index 7b3fd2899acd..64f8b1208457 100644 --- a/modules/context/context.go +++ b/modules/context/context.go @@ -380,6 +380,21 @@ func (ctx *Context) ServeFile(file string, names ...string) { http.ServeFile(ctx.Resp, ctx.Req, file) } +// ServeStream serves file via io stream +func (ctx *Context) ServeStream(rd io.Reader, name string) { + ctx.Resp.Header().Set("Content-Description", "File Transfer") + ctx.Resp.Header().Set("Content-Type", "application/octet-stream") + ctx.Resp.Header().Set("Content-Disposition", "attachment; filename="+name) + ctx.Resp.Header().Set("Content-Transfer-Encoding", "binary") + ctx.Resp.Header().Set("Expires", "0") + ctx.Resp.Header().Set("Cache-Control", "must-revalidate") + ctx.Resp.Header().Set("Pragma", "public") + _, err := io.Copy(ctx.Resp, rd) + if err != nil { + ctx.ServerError("Download file failed", err) + } +} + // Error returned an error to web browser func (ctx *Context) Error(status int, contents ...string) { var v = http.StatusText(status) diff --git a/modules/setting/repository.go b/modules/setting/repository.go index a7666895e1f3..6729df289605 100644 --- a/modules/setting/repository.go +++ b/modules/setting/repository.go @@ -251,6 +251,10 @@ var ( } RepoRootPath string ScriptType = "bash" + + RepoArchive = struct { + Storage + }{} ) func newRepository() { @@ -328,4 +332,7 @@ func newRepository() { if !filepath.IsAbs(Repository.Upload.TempPath) { Repository.Upload.TempPath = path.Join(AppWorkPath, Repository.Upload.TempPath) } + + repoAvatarSec := Cfg.Section("repository.archives") + RepoArchive.Storage = getStorage("repo-archive", "", repoAvatarSec) } diff --git a/modules/storage/storage.go b/modules/storage/storage.go index 984f154db48b..b3708908f818 100644 --- a/modules/storage/storage.go +++ b/modules/storage/storage.go @@ -114,6 +114,9 @@ var ( Avatars ObjectStorage // RepoAvatars represents repository avatars storage RepoAvatars ObjectStorage + + // RepoArchives represents repository archives storage + RepoArchives ObjectStorage ) // Init init the stoarge @@ -130,7 +133,11 @@ func Init() error { return err } - return initLFS() + if err := initLFS(); err != nil { + return err + } + + return initRepoArchives() } // NewStorage takes a storage type and some config and returns an ObjectStorage or an error @@ -169,3 +176,9 @@ func initRepoAvatars() (err error) { RepoAvatars, err = NewStorage(setting.RepoAvatar.Storage.Type, &setting.RepoAvatar.Storage) return } + +func initRepoArchives() (err error) { + log.Info("Initialising Repository Archive storage with type: %s", setting.RepoArchive.Storage.Type) + RepoArchives, err = NewStorage(setting.RepoArchive.Storage.Type, &setting.RepoArchive.Storage) + return +} diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index f149e92a8b6b..9efe8c7c6b76 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/storage" "code.gitea.io/gitea/modules/web" archiver_service "code.gitea.io/gitea/services/archiver" "code.gitea.io/gitea/services/forms" @@ -364,13 +365,59 @@ func RedirectDownload(ctx *context.Context) { ctx.Error(http.StatusNotFound) } +// Download an archive of a repository +func Download(ctx *context.Context) { + uri := ctx.Params("*") + aReq, err := archiver_service.NewRequest(ctx.Repo.GitRepo, uri) + if err != nil { + ctx.ServerError("archiver_service.NewRequest", err) + return + } + if aReq == nil { + ctx.Error(http.StatusNotFound) + return + } + + downloadName := ctx.Repo.Repository.Name + "-" + aReq.GetArchiveName() + complete := aReq.IsComplete() + if !complete { + aReq = archiver_service.ArchiveRepository(aReq) + complete = aReq.WaitForCompletion(ctx) + } + + if complete { + if setting.RepoArchive.ServeDirect { + //If we have a signed url (S3, object storage), redirect to this directly. + u, err := storage.RepoArchives.URL(aReq.GetArchivePath(), downloadName) + if u != nil && err == nil { + ctx.Redirect(u.String()) + return + } + } + + //If we have matched and access to release or issue + fr, err := storage.RepoArchives.Open(aReq.GetArchivePath()) + if err != nil { + ctx.ServerError("Open", err) + return + } + defer fr.Close() + ctx.ServeStream(fr, downloadName) + } else { + ctx.Error(http.StatusNotFound) + } +} + // InitiateDownload will enqueue an archival request, as needed. It may submit // a request that's already in-progress, but the archiver service will just // kind of drop it on the floor if this is the case. func InitiateDownload(ctx *context.Context) { uri := ctx.Params("*") - aReq := archiver_service.DeriveRequestFrom(ctx, uri) - + aReq, err := archiver_service.NewRequest(ctx.Repo.GitRepo, uri) + if err != nil { + ctx.ServerError("archiver_service.NewRequest", err) + return + } if aReq == nil { ctx.Error(http.StatusNotFound) return diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index dfa6334d9536..dc751b43cbab 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -6,7 +6,8 @@ package archiver import ( - "io" + "errors" + "fmt" "io/ioutil" "os" "path" @@ -21,7 +22,7 @@ import ( "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/modules/storage" ) // ArchiveRequest defines the parameters of an archive request, which notably @@ -109,17 +110,13 @@ func getArchiveRequest(repo *git.Repository, commit *git.Commit, archiveType git return nil } -// DeriveRequestFrom creates an archival request, based on the URI. The +// NewRequest creates an archival request, based on the URI. The // resulting ArchiveRequest is suitable for being passed to ArchiveRepository() // if it's determined that the request still needs to be satisfied. -func DeriveRequestFrom(ctx *context.Context, uri string) *ArchiveRequest { - if ctx.Repo == nil || ctx.Repo.GitRepo == nil { - log.Trace("Repo not initialized") - return nil - } +func NewRequest(repo *git.Repository, uri string) (*ArchiveRequest, error) { r := &ArchiveRequest{ uri: uri, - repo: ctx.Repo.GitRepo, + repo: repo, } switch { @@ -132,69 +129,46 @@ func DeriveRequestFrom(ctx *context.Context, uri string) *ArchiveRequest { r.archivePath = path.Join(r.repo.Path, "archives/targz") r.archiveType = git.TARGZ default: - log.Trace("Unknown format: %s", uri) - return nil + return nil, fmt.Errorf("Unknown format: %s", uri) } r.refName = strings.TrimSuffix(r.uri, r.ext) - isDir, err := util.IsDir(r.archivePath) - if err != nil { - ctx.ServerError("Download -> util.IsDir(archivePath)", err) - return nil - } - if !isDir { - if err := os.MkdirAll(r.archivePath, os.ModePerm); err != nil { - ctx.ServerError("Download -> os.MkdirAll(archivePath)", err) - return nil - } - } + var err error // Get corresponding commit. if r.repo.IsBranchExist(r.refName) { r.commit, err = r.repo.GetBranchCommit(r.refName) if err != nil { - ctx.ServerError("GetBranchCommit", err) - return nil + return nil, err } } else if r.repo.IsTagExist(r.refName) { r.commit, err = r.repo.GetTagCommit(r.refName) if err != nil { - ctx.ServerError("GetTagCommit", err) - return nil + return nil, err } } else if shaRegex.MatchString(r.refName) { r.commit, err = r.repo.GetCommit(r.refName) if err != nil { - ctx.NotFound("GetCommit", nil) - return nil + return nil, err } } else { - ctx.NotFound("DeriveRequestFrom", nil) - return nil + return nil, fmt.Errorf("Unknow ref %s type", r.refName) } archiveMutex.Lock() defer archiveMutex.Unlock() if rExisting := getArchiveRequest(r.repo, r.commit, r.archiveType); rExisting != nil { - return rExisting + return rExisting, nil } r.archivePath = path.Join(r.archivePath, base.ShortSha(r.commit.ID.String())+r.ext) - r.archiveComplete, err = util.IsFile(r.archivePath) if err != nil { - ctx.ServerError("util.IsFile", err) - return nil + return nil, err } - return r + return r, nil } func doArchive(r *ArchiveRequest) { - var ( - err error - tmpArchive *os.File - destArchive *os.File - ) - // Close the channel to indicate to potential waiters that this request // has finished. defer close(r.cchan) @@ -203,19 +177,19 @@ func doArchive(r *ArchiveRequest) { // race conditions and difficulties in locking. Do one last check that // the archive we're referring to doesn't already exist. If it does exist, // then just mark the request as complete and move on. - isFile, err := util.IsFile(r.archivePath) - if err != nil { - log.Error("Unable to check if %s util.IsFile: %v. Will ignore and recreate.", r.archivePath, err) - } - if isFile { + _, err := storage.RepoArchives.Stat(r.archivePath) + if err == nil { r.archiveComplete = true return } + if !errors.Is(err, os.ErrNotExist) { + log.Error("Unable to check if %s util.IsFile: %v. Will ignore and recreate.", r.archivePath, err) + } // Create a temporary file to use while the archive is being built. We // will then copy it into place (r.archivePath) once it's fully // constructed. - tmpArchive, err = ioutil.TempFile("", "archive") + tmpArchive, err := ioutil.TempFile("", "archive") if err != nil { log.Error("Unable to create a temporary archive file! Error: %v", err) return @@ -233,14 +207,14 @@ func doArchive(r *ArchiveRequest) { return } - // Now we copy it into place - if destArchive, err = os.Create(r.archivePath); err != nil { - log.Error("Unable to open archive " + r.archivePath) + f, err := os.Open(tmpArchive.Name()) + if err != nil { + log.Error("Unable to open temp archive " + tmpArchive.Name()) return } - _, err = io.Copy(destArchive, tmpArchive) - destArchive.Close() - if err != nil { + defer f.Close() + + if _, err := storage.RepoArchives.Save(r.archivePath, f); err != nil { log.Error("Unable to write archive " + r.archivePath) return } From d8fe2b30c462d84c88aab366c6597661034c3a46 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 18 Mar 2021 23:12:05 +0800 Subject: [PATCH 02/25] Fix backend lint --- services/archiver/archiver_test.go | 33 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/services/archiver/archiver_test.go b/services/archiver/archiver_test.go index 6dcd942bf5ef..8b9cff99bac1 100644 --- a/services/archiver/archiver_test.go +++ b/services/archiver/archiver_test.go @@ -77,11 +77,13 @@ func TestArchive_Basic(t *testing.T) { ctx := test.MockContext(t, "user27/repo49") firstCommit, secondCommit := "51f84af23134", "aacbdfe9e1c4" - bogusReq := DeriveRequestFrom(ctx, firstCommit+".zip") + bogusReq, err := NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + assert.NoError(t, err) assert.Nil(t, bogusReq) test.LoadRepo(t, ctx, 49) - bogusReq = DeriveRequestFrom(ctx, firstCommit+".zip") + bogusReq, err = NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + assert.NoError(t, err) assert.Nil(t, bogusReq) test.LoadGitRepo(t, ctx) @@ -89,25 +91,31 @@ func TestArchive_Basic(t *testing.T) { // Check a series of bogus requests. // Step 1, valid commit with a bad extension. - bogusReq = DeriveRequestFrom(ctx, firstCommit+".dilbert") + bogusReq, err = NewRequest(ctx.Repo.GitRepo, firstCommit+".dilbert") + assert.NoError(t, err) assert.Nil(t, bogusReq) // Step 2, missing commit. - bogusReq = DeriveRequestFrom(ctx, "dbffff.zip") + bogusReq, err = NewRequest(ctx.Repo.GitRepo, "dbffff.zip") + assert.NoError(t, err) assert.Nil(t, bogusReq) // Step 3, doesn't look like branch/tag/commit. - bogusReq = DeriveRequestFrom(ctx, "db.zip") + bogusReq, err = NewRequest(ctx.Repo.GitRepo, "db.zip") + assert.NoError(t, err) assert.Nil(t, bogusReq) // Now two valid requests, firstCommit with valid extensions. - zipReq := DeriveRequestFrom(ctx, firstCommit+".zip") + zipReq, err := NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + assert.NoError(t, err) assert.NotNil(t, zipReq) - tgzReq := DeriveRequestFrom(ctx, firstCommit+".tar.gz") + tgzReq, err := NewRequest(ctx.Repo.GitRepo, firstCommit+".tar.gz") + assert.NoError(t, err) assert.NotNil(t, tgzReq) - secondReq := DeriveRequestFrom(ctx, secondCommit+".zip") + secondReq, err := NewRequest(ctx.Repo.GitRepo, secondCommit+".zip") + assert.NoError(t, err) assert.NotNil(t, secondReq) inFlight := make([]*ArchiveRequest, 3) @@ -162,7 +170,8 @@ func TestArchive_Basic(t *testing.T) { // Do so now. assert.Len(t, archiveInProgress, 3) - zipReq2 := DeriveRequestFrom(ctx, firstCommit+".zip") + zipReq2, err := NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + assert.NoError(t, err) // This zipReq should match what's sitting in the queue, as we haven't // let it release yet. From the consumer's point of view, this looks like // a long-running archive task. @@ -188,7 +197,8 @@ func TestArchive_Basic(t *testing.T) { // after we release it. We should trigger both the timeout and non-timeout // cases. var completed, timedout bool - timedReq := DeriveRequestFrom(ctx, secondCommit+".tar.gz") + timedReq, err := NewRequest(ctx.Repo.GitRepo, secondCommit+".tar.gz") + assert.NoError(t, err) assert.NotNil(t, timedReq) ArchiveRepository(timedReq) @@ -206,7 +216,8 @@ func TestArchive_Basic(t *testing.T) { assert.True(t, completed) assert.False(t, timedout) - zipReq2 = DeriveRequestFrom(ctx, firstCommit+".zip") + zipReq2, err = NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + assert.NoError(t, err) // Now, we're guaranteed to have released the original zipReq from the queue. // Ensure that we don't get handed back the released entry somehow, but they // should remain functionally equivalent in all fields. The exception here From a98990fcd136e5a9335a00933941307ffbd1c24e Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 20 Mar 2021 09:39:03 +0800 Subject: [PATCH 03/25] Add archiver table on database --- models/repo_archiver.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 models/repo_archiver.go diff --git a/models/repo_archiver.go b/models/repo_archiver.go new file mode 100644 index 000000000000..76df489b3ff4 --- /dev/null +++ b/models/repo_archiver.go @@ -0,0 +1,21 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package models + +import ( + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/timeutil" +) + +// RepoArchiver represents all archivers +type RepoArchiver struct { + ID int64 `xorm:"pk autoincr"` + RepoID int64 + Type git.ArchiveType + RefName string + Name string + CreatedUnix timeutil.TimeStamp `xorm:"INDEX NOT NULL created"` + UpdatedUnix timeutil.TimeStamp `xorm:"INDEX NOT NULL updated"` +} From 23f057789a7cbd5d237e8f8898acdd8ede25472e Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 16 Apr 2021 22:36:23 +0800 Subject: [PATCH 04/25] Finish archive download --- models/repo_archiver.go | 28 +- .../{commit_archive.go => repo_archive.go} | 31 +-- routers/web/repo/repo.go | 57 ++-- services/archiver/archiver.go | 252 +++++------------- 4 files changed, 125 insertions(+), 243 deletions(-) rename modules/git/{commit_archive.go => repo_archive.go} (60%) diff --git a/models/repo_archiver.go b/models/repo_archiver.go index 76df489b3ff4..73b8fbb80d53 100644 --- a/models/repo_archiver.go +++ b/models/repo_archiver.go @@ -11,11 +11,29 @@ import ( // RepoArchiver represents all archivers type RepoArchiver struct { - ID int64 `xorm:"pk autoincr"` - RepoID int64 - Type git.ArchiveType - RefName string + ID int64 `xorm:"pk autoincr"` + RepoID int64 `xorm:"index unique(s)"` + Type git.ArchiveType `xorm:"unique(s)"` + CommitID string `xorm:"VARCHAR(40) unique(s)"` Name string CreatedUnix timeutil.TimeStamp `xorm:"INDEX NOT NULL created"` - UpdatedUnix timeutil.TimeStamp `xorm:"INDEX NOT NULL updated"` +} + +// GetRepoArchiver get an archiver +func GetRepoArchiver(ctx DBContext, repoID int64, tp git.ArchiveType, commitID string) (*RepoArchiver, error) { + var archiver RepoArchiver + has, err := ctx.e.Where("repo_id=?", repoID).And("`type`=?", tp).And("commit_id=?", commitID).Get(&archiver) + if err != nil { + return nil, err + } + if has { + return &archiver, nil + } + return nil, nil +} + +// AddArchiver adds an archiver +func AddArchiver(ctx DBContext, archiver *RepoArchiver) error { + _, err := ctx.e.Insert(archiver) + return err } diff --git a/modules/git/commit_archive.go b/modules/git/repo_archive.go similarity index 60% rename from modules/git/commit_archive.go rename to modules/git/repo_archive.go index d075ba09115f..07003aa6b2c1 100644 --- a/modules/git/commit_archive.go +++ b/modules/git/repo_archive.go @@ -8,6 +8,7 @@ package git import ( "context" "fmt" + "io" "path/filepath" "strings" ) @@ -33,32 +34,28 @@ func (a ArchiveType) String() string { return "unknown" } -// CreateArchiveOpts represents options for creating an archive -type CreateArchiveOpts struct { - Format ArchiveType - Prefix bool -} - // CreateArchive create archive content to the target path -func (c *Commit) CreateArchive(ctx context.Context, target string, opts CreateArchiveOpts) error { - if opts.Format.String() == "unknown" { - return fmt.Errorf("unknown format: %v", opts.Format) +func (repo *Repository) CreateArchive(ctx context.Context, format ArchiveType, target io.Writer, usePrefix bool, commitID string) error { + if format.String() == "unknown" { + return fmt.Errorf("unknown format: %v", format) } args := []string{ "archive", } - if opts.Prefix { - args = append(args, "--prefix="+filepath.Base(strings.TrimSuffix(c.repo.Path, ".git"))+"/") + if usePrefix { + args = append(args, "--prefix="+filepath.Base(strings.TrimSuffix(repo.Path, ".git"))+"/") } args = append(args, - "--format="+opts.Format.String(), - "-o", - target, - c.ID.String(), + "--format="+format.String(), + commitID, ) - _, err := NewCommandContext(ctx, args...).RunInDir(c.repo.Path) - return err + var stderr strings.Builder + err := NewCommandContext(ctx, args...).RunInDirPipeline(repo.Path, target, &stderr) + if err != nil { + return ConcatenateError(err, stderr.String()) + } + return nil } diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index 9efe8c7c6b76..d16332b68a14 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -10,7 +10,6 @@ import ( "fmt" "net/http" "strings" - "time" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" @@ -368,7 +367,7 @@ func RedirectDownload(ctx *context.Context) { // Download an archive of a repository func Download(ctx *context.Context) { uri := ctx.Params("*") - aReq, err := archiver_service.NewRequest(ctx.Repo.GitRepo, uri) + aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.RepoID, ctx.Repo.GitRepo, uri) if err != nil { ctx.ServerError("archiver_service.NewRequest", err) return @@ -379,33 +378,29 @@ func Download(ctx *context.Context) { } downloadName := ctx.Repo.Repository.Name + "-" + aReq.GetArchiveName() - complete := aReq.IsComplete() - if !complete { - aReq = archiver_service.ArchiveRepository(aReq) - complete = aReq.WaitForCompletion(ctx) - } - - if complete { - if setting.RepoArchive.ServeDirect { - //If we have a signed url (S3, object storage), redirect to this directly. - u, err := storage.RepoArchives.URL(aReq.GetArchivePath(), downloadName) - if u != nil && err == nil { - ctx.Redirect(u.String()) - return - } - } - //If we have matched and access to release or issue - fr, err := storage.RepoArchives.Open(aReq.GetArchivePath()) - if err != nil { - ctx.ServerError("Open", err) + if err := archiver_service.ArchiveRepository(aReq); err != nil { + ctx.ServerError("ArchiveRepository", err) + return + } + + if setting.RepoArchive.ServeDirect { + //If we have a signed url (S3, object storage), redirect to this directly. + u, err := storage.RepoArchives.URL(aReq.GetArchivePath(), downloadName) + if u != nil && err == nil { + ctx.Redirect(u.String()) return } - defer fr.Close() - ctx.ServeStream(fr, downloadName) - } else { - ctx.Error(http.StatusNotFound) } + + //If we have matched and access to release or issue + fr, err := storage.RepoArchives.Open(aReq.GetArchivePath()) + if err != nil { + ctx.ServerError("Open", err) + return + } + defer fr.Close() + ctx.ServeStream(fr, downloadName) } // InitiateDownload will enqueue an archival request, as needed. It may submit @@ -413,7 +408,7 @@ func Download(ctx *context.Context) { // kind of drop it on the floor if this is the case. func InitiateDownload(ctx *context.Context) { uri := ctx.Params("*") - aReq, err := archiver_service.NewRequest(ctx.Repo.GitRepo, uri) + aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri) if err != nil { ctx.ServerError("archiver_service.NewRequest", err) return @@ -423,13 +418,13 @@ func InitiateDownload(ctx *context.Context) { return } - complete := aReq.IsComplete() - if !complete { - aReq = archiver_service.ArchiveRepository(aReq) - complete, _ = aReq.TimedWaitForCompletion(ctx, 2*time.Second) + err = archiver_service.ArchiveRepository(aReq) + if err != nil { + ctx.ServerError("archiver_service.ArchiveRepository", err) + return } ctx.JSON(http.StatusOK, map[string]interface{}{ - "complete": complete, + "complete": true, }) } diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index dc751b43cbab..3ae1e2465b2d 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -6,21 +6,16 @@ package archiver import ( - "errors" "fmt" - "io/ioutil" - "os" + "io" "path" "regexp" "strings" - "sync" - "time" + "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" - "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/graceful" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" ) @@ -31,92 +26,28 @@ import ( // This is entirely opaque to external entities, though, and mostly used as a // handle elsewhere. type ArchiveRequest struct { - uri string - repo *git.Repository - refName string - ext string - archivePath string - archiveType git.ArchiveType - archiveComplete bool - commit *git.Commit - cchan chan struct{} + uri string + repoID int64 + repo *git.Repository + refName string + ext string + archivePath string + archiveType git.ArchiveType + commit *git.Commit } -var archiveInProgress []*ArchiveRequest -var archiveMutex sync.Mutex - // SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all // the way to 64. var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`) -// These facilitate testing, by allowing the unit tests to control (to some extent) -// the goroutine used for processing the queue. -var archiveQueueMutex *sync.Mutex -var archiveQueueStartCond *sync.Cond -var archiveQueueReleaseCond *sync.Cond - -// GetArchivePath returns the path from which we can serve this archive. -func (aReq *ArchiveRequest) GetArchivePath() string { - return aReq.archivePath -} - -// GetArchiveName returns the name of the caller, based on the ref used by the -// caller to create this request. -func (aReq *ArchiveRequest) GetArchiveName() string { - return aReq.refName + aReq.ext -} - -// IsComplete returns the completion status of this request. -func (aReq *ArchiveRequest) IsComplete() bool { - return aReq.archiveComplete -} - -// WaitForCompletion will wait for this request to complete, with no timeout. -// It returns whether the archive was actually completed, as the channel could -// have also been closed due to an error. -func (aReq *ArchiveRequest) WaitForCompletion(ctx *context.Context) bool { - select { - case <-aReq.cchan: - case <-ctx.Done(): - } - - return aReq.IsComplete() -} - -// TimedWaitForCompletion will wait for this request to complete, with timeout -// happening after the specified Duration. It returns whether the archive is -// now complete and whether we hit the timeout or not. The latter may not be -// useful if the request is complete or we started to shutdown. -func (aReq *ArchiveRequest) TimedWaitForCompletion(ctx *context.Context, dur time.Duration) (bool, bool) { - timeout := false - select { - case <-time.After(dur): - timeout = true - case <-aReq.cchan: - case <-ctx.Done(): - } - - return aReq.IsComplete(), timeout -} - -// The caller must hold the archiveMutex across calls to getArchiveRequest. -func getArchiveRequest(repo *git.Repository, commit *git.Commit, archiveType git.ArchiveType) *ArchiveRequest { - for _, r := range archiveInProgress { - // Need to be referring to the same repository. - if r.repo.Path == repo.Path && r.commit.ID == commit.ID && r.archiveType == archiveType { - return r - } - } - return nil -} - // NewRequest creates an archival request, based on the URI. The // resulting ArchiveRequest is suitable for being passed to ArchiveRepository() // if it's determined that the request still needs to be satisfied. -func NewRequest(repo *git.Repository, uri string) (*ArchiveRequest, error) { +func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) { r := &ArchiveRequest{ - uri: uri, - repo: repo, + repoID: repoID, + uri: uri, + repo: repo, } switch { @@ -155,12 +86,6 @@ func NewRequest(repo *git.Repository, uri string) (*ArchiveRequest, error) { return nil, fmt.Errorf("Unknow ref %s type", r.refName) } - archiveMutex.Lock() - defer archiveMutex.Unlock() - if rExisting := getArchiveRequest(r.repo, r.commit, r.archiveType); rExisting != nil { - return rExisting, nil - } - r.archivePath = path.Join(r.archivePath, base.ShortSha(r.commit.ID.String())+r.ext) if err != nil { return nil, err @@ -168,59 +93,65 @@ func NewRequest(repo *git.Repository, uri string) (*ArchiveRequest, error) { return r, nil } -func doArchive(r *ArchiveRequest) { - // Close the channel to indicate to potential waiters that this request - // has finished. - defer close(r.cchan) +// GetArchivePath returns the path from which we can serve this archive. +func (aReq *ArchiveRequest) GetArchivePath() string { + return aReq.archivePath +} - // It could have happened that we enqueued two archival requests, due to - // race conditions and difficulties in locking. Do one last check that - // the archive we're referring to doesn't already exist. If it does exist, - // then just mark the request as complete and move on. - _, err := storage.RepoArchives.Stat(r.archivePath) - if err == nil { - r.archiveComplete = true - return - } - if !errors.Is(err, os.ErrNotExist) { - log.Error("Unable to check if %s util.IsFile: %v. Will ignore and recreate.", r.archivePath, err) +// GetArchiveName returns the name of the caller, based on the ref used by the +// caller to create this request. +func (aReq *ArchiveRequest) GetArchiveName() string { + return aReq.refName + aReq.ext +} + +func doArchive(r *ArchiveRequest) error { + ctx, commiter, err := models.TxDBContext() + if err != nil { + return err } + defer commiter.Close() - // Create a temporary file to use while the archive is being built. We - // will then copy it into place (r.archivePath) once it's fully - // constructed. - tmpArchive, err := ioutil.TempFile("", "archive") + archiver, err := models.GetRepoArchiver(ctx, r.repoID, r.archiveType, r.commit.ID.String()) if err != nil { - log.Error("Unable to create a temporary archive file! Error: %v", err) - return + return err + } + if archiver != nil { + return nil } - defer func() { - tmpArchive.Close() - os.Remove(tmpArchive.Name()) - }() - if err = r.commit.CreateArchive(graceful.GetManager().ShutdownContext(), tmpArchive.Name(), git.CreateArchiveOpts{ - Format: r.archiveType, - Prefix: setting.Repository.PrefixArchiveFiles, + if err := models.AddArchiver(ctx, &models.RepoArchiver{ + RepoID: r.repoID, + Type: r.archiveType, + CommitID: r.commit.ID.String(), + Name: r.GetArchiveName(), }); err != nil { - log.Error("Download -> CreateArchive "+tmpArchive.Name(), err) - return + return err } - f, err := os.Open(tmpArchive.Name()) - if err != nil { - log.Error("Unable to open temp archive " + tmpArchive.Name()) - return + rd, w := io.Pipe() + var done chan error + + go func(done chan error, w io.Writer) { + err := r.repo.CreateArchive( + graceful.GetManager().ShutdownContext(), + r.archiveType, + w, + setting.Repository.PrefixArchiveFiles, + r.commit.ID.String(), + ) + done <- err + }(done, w) + + if _, err := storage.RepoArchives.Save(r.archivePath, rd, -1); err != nil { + return fmt.Errorf("Unable to write archive: %v", err) } - defer f.Close() - if _, err := storage.RepoArchives.Save(r.archivePath, f); err != nil { - log.Error("Unable to write archive " + r.archivePath) - return + err = <-done + if err != nil { + return err } - // Block any attempt to finalize creating a new request if we're marking - r.archiveComplete = true + return commiter.Commit() } // ArchiveRepository satisfies the ArchiveRequest being passed in. Processing @@ -229,65 +160,6 @@ func doArchive(r *ArchiveRequest) { // anything. In all cases, the caller should be examining the *ArchiveRequest // being returned for completion, as it may be different than the one they passed // in. -func ArchiveRepository(request *ArchiveRequest) *ArchiveRequest { - // We'll return the request that's already been enqueued if it has been - // enqueued, or we'll immediately enqueue it if it has not been enqueued - // and it is not marked complete. - archiveMutex.Lock() - defer archiveMutex.Unlock() - if rExisting := getArchiveRequest(request.repo, request.commit, request.archiveType); rExisting != nil { - return rExisting - } - if request.archiveComplete { - return request - } - - request.cchan = make(chan struct{}) - archiveInProgress = append(archiveInProgress, request) - go func() { - // Wait to start, if we have the Cond for it. This is currently only - // useful for testing, so that the start and release of queued entries - // can be controlled to examine the queue. - if archiveQueueStartCond != nil { - archiveQueueMutex.Lock() - archiveQueueStartCond.Wait() - archiveQueueMutex.Unlock() - } - - // Drop the mutex while we process the request. This may take a long - // time, and it's not necessary now that we've added the reequest to - // archiveInProgress. - doArchive(request) - - if archiveQueueReleaseCond != nil { - archiveQueueMutex.Lock() - archiveQueueReleaseCond.Wait() - archiveQueueMutex.Unlock() - } - - // Purge this request from the list. To do so, we'll just take the - // index at which we ended up at and swap the final element into that - // position, then chop off the now-redundant final element. The slice - // may have change in between these two segments and we may have moved, - // so we search for it here. We could perhaps avoid this search - // entirely if len(archiveInProgress) == 1, but we should verify - // correctness. - archiveMutex.Lock() - defer archiveMutex.Unlock() - - idx := -1 - for _idx, req := range archiveInProgress { - if req == request { - idx = _idx - break - } - } - if idx == -1 { - log.Error("ArchiveRepository: Failed to find request for removal.") - return - } - archiveInProgress = append(archiveInProgress[:idx], archiveInProgress[idx+1:]...) - }() - - return request +func ArchiveRepository(request *ArchiveRequest) error { + return doArchive(request) } From 75e2230c680c815c7a9b283572b553ec12aa57d7 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 17 Apr 2021 15:19:49 +0800 Subject: [PATCH 05/25] Fix test --- services/archiver/archiver_test.go | 133 +++-------------------------- 1 file changed, 13 insertions(+), 120 deletions(-) diff --git a/services/archiver/archiver_test.go b/services/archiver/archiver_test.go index 8b9cff99bac1..f075cd379232 100644 --- a/services/archiver/archiver_test.go +++ b/services/archiver/archiver_test.go @@ -6,83 +6,35 @@ package archiver import ( "path/filepath" - "sync" "testing" "time" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/test" - "code.gitea.io/gitea/modules/util" "github.com/stretchr/testify/assert" ) -var queueMutex sync.Mutex - func TestMain(m *testing.M) { models.MainTest(m, filepath.Join("..", "..")) } func waitForCount(t *testing.T, num int) { - var numQueued int - - // Wait for up to 10 seconds for the queue to be impacted. - timeout := time.Now().Add(10 * time.Second) - for { - numQueued = len(archiveInProgress) - if numQueued == num || time.Now().After(timeout) { - break - } - } - - assert.Len(t, archiveInProgress, num) -} - -func releaseOneEntry(t *testing.T, inFlight []*ArchiveRequest) { - var nowQueued, numQueued int - - numQueued = len(archiveInProgress) - // Release one, then wait up to 10 seconds for it to complete. - queueMutex.Lock() - archiveQueueReleaseCond.Signal() - queueMutex.Unlock() - timeout := time.Now().Add(10 * time.Second) - for { - nowQueued = len(archiveInProgress) - if nowQueued != numQueued || time.Now().After(timeout) { - break - } - } - - // Make sure we didn't just timeout. - assert.NotEqual(t, numQueued, nowQueued) - - // Also make sure that we released only one. - assert.Equal(t, numQueued-1, nowQueued) } func TestArchive_Basic(t *testing.T) { assert.NoError(t, models.PrepareTestDatabase()) - archiveQueueMutex = &queueMutex - archiveQueueStartCond = sync.NewCond(&queueMutex) - archiveQueueReleaseCond = sync.NewCond(&queueMutex) - defer func() { - archiveQueueMutex = nil - archiveQueueStartCond = nil - archiveQueueReleaseCond = nil - }() - ctx := test.MockContext(t, "user27/repo49") firstCommit, secondCommit := "51f84af23134", "aacbdfe9e1c4" - bogusReq, err := NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + bogusReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") assert.NoError(t, err) assert.Nil(t, bogusReq) test.LoadRepo(t, ctx, 49) - bogusReq, err = NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") assert.NoError(t, err) assert.Nil(t, bogusReq) @@ -91,30 +43,30 @@ func TestArchive_Basic(t *testing.T) { // Check a series of bogus requests. // Step 1, valid commit with a bad extension. - bogusReq, err = NewRequest(ctx.Repo.GitRepo, firstCommit+".dilbert") + bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".dilbert") assert.NoError(t, err) assert.Nil(t, bogusReq) // Step 2, missing commit. - bogusReq, err = NewRequest(ctx.Repo.GitRepo, "dbffff.zip") + bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "dbffff.zip") assert.NoError(t, err) assert.Nil(t, bogusReq) // Step 3, doesn't look like branch/tag/commit. - bogusReq, err = NewRequest(ctx.Repo.GitRepo, "db.zip") + bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "db.zip") assert.NoError(t, err) assert.Nil(t, bogusReq) // Now two valid requests, firstCommit with valid extensions. - zipReq, err := NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + zipReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") assert.NoError(t, err) assert.NotNil(t, zipReq) - tgzReq, err := NewRequest(ctx.Repo.GitRepo, firstCommit+".tar.gz") + tgzReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".tar.gz") assert.NoError(t, err) assert.NotNil(t, tgzReq) - secondReq, err := NewRequest(ctx.Repo.GitRepo, secondCommit+".zip") + secondReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".zip") assert.NoError(t, err) assert.NotNil(t, secondReq) @@ -136,41 +88,8 @@ func TestArchive_Basic(t *testing.T) { // Sleep two seconds to make sure the queue doesn't change. time.Sleep(2 * time.Second) - assert.Len(t, archiveInProgress, 3) - - // Release them all, they'll then stall at the archiveQueueReleaseCond while - // we examine the queue state. - queueMutex.Lock() - archiveQueueStartCond.Broadcast() - queueMutex.Unlock() - - // Iterate through all of the in-flight requests and wait for their - // completion. - for _, req := range inFlight { - req.WaitForCompletion(ctx) - } - - for _, req := range inFlight { - assert.True(t, req.IsComplete()) - exist, err := util.IsExist(req.GetArchivePath()) - assert.NoError(t, err) - assert.True(t, exist) - } - - arbitraryReq := inFlight[0] - // Reopen the channel so we don't double-close, mark it incomplete. We're - // going to run it back through the archiver, and it should get marked - // complete again. - arbitraryReq.cchan = make(chan struct{}) - arbitraryReq.archiveComplete = false - doArchive(arbitraryReq) - assert.True(t, arbitraryReq.IsComplete()) - - // Queues should not have drained yet, because we haven't released them. - // Do so now. - assert.Len(t, archiveInProgress, 3) - - zipReq2, err := NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + + zipReq2, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") assert.NoError(t, err) // This zipReq should match what's sitting in the queue, as we haven't // let it release yet. From the consumer's point of view, this looks like @@ -182,48 +101,22 @@ func TestArchive_Basic(t *testing.T) { // predecessor has cleared out of the queue. ArchiveRepository(zipReq2) - // Make sure the queue hasn't grown any. - assert.Len(t, archiveInProgress, 3) - - // Make sure the queue drains properly - releaseOneEntry(t, inFlight) - assert.Len(t, archiveInProgress, 2) - releaseOneEntry(t, inFlight) - assert.Len(t, archiveInProgress, 1) - releaseOneEntry(t, inFlight) - assert.Empty(t, archiveInProgress) - // Now we'll submit a request and TimedWaitForCompletion twice, before and // after we release it. We should trigger both the timeout and non-timeout // cases. - var completed, timedout bool - timedReq, err := NewRequest(ctx.Repo.GitRepo, secondCommit+".tar.gz") + timedReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".tar.gz") assert.NoError(t, err) assert.NotNil(t, timedReq) ArchiveRepository(timedReq) - // Guaranteed to timeout; we haven't signalled the request to start.. - completed, timedout = timedReq.TimedWaitForCompletion(ctx, 2*time.Second) - assert.False(t, completed) - assert.True(t, timedout) - - queueMutex.Lock() - archiveQueueStartCond.Broadcast() - queueMutex.Unlock() - - // Shouldn't timeout, we've now signalled it and it's a small request. - completed, timedout = timedReq.TimedWaitForCompletion(ctx, 15*time.Second) - assert.True(t, completed) - assert.False(t, timedout) - - zipReq2, err = NewRequest(ctx.Repo.GitRepo, firstCommit+".zip") + zipReq2, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") assert.NoError(t, err) // Now, we're guaranteed to have released the original zipReq from the queue. // Ensure that we don't get handed back the released entry somehow, but they // should remain functionally equivalent in all fields. The exception here // is zipReq.cchan, which will be non-nil because it's a completed request. // It's fine to go ahead and set it to nil now. - zipReq.cchan = nil + assert.Equal(t, zipReq, zipReq2) assert.False(t, zipReq == zipReq2) From 6e38463bead3d014fa8e4e7b0a653182c4c2921f Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 20 Apr 2021 14:29:39 +0800 Subject: [PATCH 06/25] Add database migrations --- models/migrations/migrations.go | 2 + models/migrations/v181.go | 1 + models/migrations/v185.go | 21 +++++++++ models/repo_archiver.go | 43 +++++++++++++++++- models/unit_tests.go | 2 + modules/setting/repository.go | 5 ++- routers/web/repo/repo.go | 23 ++++++---- services/archiver/archiver.go | 80 ++++++++++++++++----------------- 8 files changed, 124 insertions(+), 53 deletions(-) create mode 100644 models/migrations/v185.go diff --git a/models/migrations/migrations.go b/models/migrations/migrations.go index 880f55092d36..4e17a6a2c8a2 100644 --- a/models/migrations/migrations.go +++ b/models/migrations/migrations.go @@ -319,6 +319,8 @@ var migrations = []Migration{ NewMigration("Create PushMirror table", createPushMirrorTable), // v184 -> v185 NewMigration("Rename Task errors to message", renameTaskErrorsToMessage), + // v185 -> v186 + NewMigration("Add new table repo_archiver", addRepoArchiver), } // GetCurrentDBVersion returns the current db version diff --git a/models/migrations/v181.go b/models/migrations/v181.go index 6ba4edc15595..65045593ad6f 100644 --- a/models/migrations/v181.go +++ b/models/migrations/v181.go @@ -1,3 +1,4 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. diff --git a/models/migrations/v185.go b/models/migrations/v185.go new file mode 100644 index 000000000000..ba77704895d1 --- /dev/null +++ b/models/migrations/v185.go @@ -0,0 +1,21 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package migrations + +import ( + "xorm.io/xorm" +) + +func addRepoArchiver(x *xorm.Engine) error { + // RepoArchiver represents all archivers + type RepoArchiver struct { + ID int64 `xorm:"pk autoincr"` + RepoID int64 `xorm:"index unique(s)"` + Type int `xorm:"unique(s)"` + CommitID string `xorm:"VARCHAR(40) unique(s)"` + CreatedUnix int64 `xorm:"INDEX NOT NULL created"` + } + return x.Sync2(new(RepoArchiver)) +} diff --git a/models/repo_archiver.go b/models/repo_archiver.go index 73b8fbb80d53..b9c99e67e840 100644 --- a/models/repo_archiver.go +++ b/models/repo_archiver.go @@ -5,20 +5,59 @@ package models import ( + "fmt" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/timeutil" ) +type RepoArchiverStatus int + +const ( + RepoArchiverGenerating = iota // the archiver is generating + RepoArchiverReady // it's ready +) + // RepoArchiver represents all archivers type RepoArchiver struct { ID int64 `xorm:"pk autoincr"` RepoID int64 `xorm:"index unique(s)"` + Repo *Repository `xorm:"-"` Type git.ArchiveType `xorm:"unique(s)"` - CommitID string `xorm:"VARCHAR(40) unique(s)"` - Name string + Status RepoArchiverStatus + CommitID string `xorm:"VARCHAR(40) unique(s)"` CreatedUnix timeutil.TimeStamp `xorm:"INDEX NOT NULL created"` } +// LoadRepo loads repository +func (archiver *RepoArchiver) LoadRepo() (*Repository, error) { + if archiver.Repo != nil { + return archiver.Repo, nil + } + + var repo Repository + has, err := x.ID(archiver.RepoID).Get(&repo) + if err != nil { + return nil, err + } + if !has { + return nil, ErrRepoNotExist{ + ID: archiver.RepoID, + } + } + return &repo, nil +} + +// RelativePath returns relative path +func (archiver *RepoArchiver) RelativePath() (string, error) { + repo, err := archiver.LoadRepo() + if err != nil { + return "", err + } + + return fmt.Sprintf("%s/%s/%s.%s", repo.RepoPath(), archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()), nil +} + // GetRepoArchiver get an archiver func GetRepoArchiver(ctx DBContext, repoID int64, tp git.ArchiveType, commitID string) (*RepoArchiver, error) { var archiver RepoArchiver diff --git a/models/unit_tests.go b/models/unit_tests.go index 5a145fa2c023..08a6e6009104 100644 --- a/models/unit_tests.go +++ b/models/unit_tests.go @@ -74,6 +74,8 @@ func MainTest(m *testing.M, pathToGiteaRoot string) { setting.RepoAvatar.Storage.Path = filepath.Join(setting.AppDataPath, "repo-avatars") + setting.RepoArchive.Storage.Path = filepath.Join(setting.AppDataPath, "repo-archives") + if err = storage.Init(); err != nil { fatalTestError("storage.Init: %v\n", err) } diff --git a/modules/setting/repository.go b/modules/setting/repository.go index 6729df289605..0db6e507b013 100644 --- a/modules/setting/repository.go +++ b/modules/setting/repository.go @@ -333,6 +333,7 @@ func newRepository() { Repository.Upload.TempPath = path.Join(AppWorkPath, Repository.Upload.TempPath) } - repoAvatarSec := Cfg.Section("repository.archives") - RepoArchive.Storage = getStorage("repo-archive", "", repoAvatarSec) + repoAvatarSec := Cfg.Section("repo-archive") + storageType := repoAvatarSec.Key("STORAGE_TYPE").MustString("") + RepoArchive.Storage = getStorage("repo-archive", storageType, repoAvatarSec) } diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index d16332b68a14..f8f84f84f583 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -367,7 +367,7 @@ func RedirectDownload(ctx *context.Context) { // Download an archive of a repository func Download(ctx *context.Context) { uri := ctx.Params("*") - aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.RepoID, ctx.Repo.GitRepo, uri) + aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri) if err != nil { ctx.ServerError("archiver_service.NewRequest", err) return @@ -377,16 +377,23 @@ func Download(ctx *context.Context) { return } - downloadName := ctx.Repo.Repository.Name + "-" + aReq.GetArchiveName() - - if err := archiver_service.ArchiveRepository(aReq); err != nil { + archiver, err := archiver_service.ArchiveRepository(aReq) + if err != nil { ctx.ServerError("ArchiveRepository", err) return } + rPath, err := archiver.RelativePath() + if err != nil { + ctx.ServerError("archiver.RelativePath", err) + return + } + + downloadName := ctx.Repo.Repository.Name + "-" + aReq.GetArchiveName() + if setting.RepoArchive.ServeDirect { //If we have a signed url (S3, object storage), redirect to this directly. - u, err := storage.RepoArchives.URL(aReq.GetArchivePath(), downloadName) + u, err := storage.RepoArchives.URL(rPath, downloadName) if u != nil && err == nil { ctx.Redirect(u.String()) return @@ -394,7 +401,7 @@ func Download(ctx *context.Context) { } //If we have matched and access to release or issue - fr, err := storage.RepoArchives.Open(aReq.GetArchivePath()) + fr, err := storage.RepoArchives.Open(rPath) if err != nil { ctx.ServerError("Open", err) return @@ -418,13 +425,13 @@ func InitiateDownload(ctx *context.Context) { return } - err = archiver_service.ArchiveRepository(aReq) + archiver, err := archiver_service.ArchiveRepository(aReq) if err != nil { ctx.ServerError("archiver_service.ArchiveRepository", err) return } ctx.JSON(http.StatusOK, map[string]interface{}{ - "complete": true, + "complete": archiver.Status == models.RepoArchiverReady, }) } diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index 3ae1e2465b2d..b4f1e12e2cd3 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -8,12 +8,10 @@ package archiver import ( "fmt" "io" - "path" "regexp" "strings" "code.gitea.io/gitea/models" - "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/setting" @@ -31,9 +29,8 @@ type ArchiveRequest struct { repo *git.Repository refName string ext string - archivePath string archiveType git.ArchiveType - commit *git.Commit + commitID string } // SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all @@ -53,105 +50,106 @@ func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest switch { case strings.HasSuffix(uri, ".zip"): r.ext = ".zip" - r.archivePath = path.Join(r.repo.Path, "archives/zip") r.archiveType = git.ZIP case strings.HasSuffix(uri, ".tar.gz"): r.ext = ".tar.gz" - r.archivePath = path.Join(r.repo.Path, "archives/targz") r.archiveType = git.TARGZ default: return nil, fmt.Errorf("Unknown format: %s", uri) } r.refName = strings.TrimSuffix(r.uri, r.ext) - var err error + var err error // Get corresponding commit. if r.repo.IsBranchExist(r.refName) { - r.commit, err = r.repo.GetBranchCommit(r.refName) + r.commitID, err = r.repo.GetBranchCommitID(r.refName) if err != nil { return nil, err } } else if r.repo.IsTagExist(r.refName) { - r.commit, err = r.repo.GetTagCommit(r.refName) + r.commitID, err = r.repo.GetTagCommitID(r.refName) if err != nil { return nil, err } } else if shaRegex.MatchString(r.refName) { - r.commit, err = r.repo.GetCommit(r.refName) - if err != nil { - return nil, err - } + r.commitID = r.refName } else { return nil, fmt.Errorf("Unknow ref %s type", r.refName) } - r.archivePath = path.Join(r.archivePath, base.ShortSha(r.commit.ID.String())+r.ext) - if err != nil { - return nil, err - } return r, nil } -// GetArchivePath returns the path from which we can serve this archive. -func (aReq *ArchiveRequest) GetArchivePath() string { - return aReq.archivePath -} - // GetArchiveName returns the name of the caller, based on the ref used by the // caller to create this request. func (aReq *ArchiveRequest) GetArchiveName() string { return aReq.refName + aReq.ext } -func doArchive(r *ArchiveRequest) error { +func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { ctx, commiter, err := models.TxDBContext() if err != nil { - return err + return nil, err } defer commiter.Close() - archiver, err := models.GetRepoArchiver(ctx, r.repoID, r.archiveType, r.commit.ID.String()) + archiver, err := models.GetRepoArchiver(ctx, r.repoID, r.archiveType, r.commitID) if err != nil { - return err + return nil, err } if archiver != nil { - return nil + return archiver, nil } - if err := models.AddArchiver(ctx, &models.RepoArchiver{ + archiver = &models.RepoArchiver{ RepoID: r.repoID, Type: r.archiveType, - CommitID: r.commit.ID.String(), - Name: r.GetArchiveName(), - }); err != nil { - return err + CommitID: r.commitID, + } + if err := models.AddArchiver(ctx, archiver); err != nil { + return nil, err } - rd, w := io.Pipe() - var done chan error + rPath, err := archiver.RelativePath() + if err != nil { + return nil, err + } - go func(done chan error, w io.Writer) { + rd, w := io.Pipe() + defer func() { + w.Close() + rd.Close() + }() + var done = make(chan error) + + go func(done chan error, w *io.PipeWriter) { + defer func() { + if r := recover(); r != nil { + done <- fmt.Errorf("%v", r) + } + }() err := r.repo.CreateArchive( graceful.GetManager().ShutdownContext(), r.archiveType, w, setting.Repository.PrefixArchiveFiles, - r.commit.ID.String(), + r.commitID, ) + w.CloseWithError(err) done <- err }(done, w) - if _, err := storage.RepoArchives.Save(r.archivePath, rd, -1); err != nil { - return fmt.Errorf("Unable to write archive: %v", err) + if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil { + return nil, fmt.Errorf("unable to write archive: %v", err) } err = <-done if err != nil { - return err + return nil, err } - return commiter.Commit() + return archiver, commiter.Commit() } // ArchiveRepository satisfies the ArchiveRequest being passed in. Processing @@ -160,6 +158,6 @@ func doArchive(r *ArchiveRequest) error { // anything. In all cases, the caller should be examining the *ArchiveRequest // being returned for completion, as it may be different than the one they passed // in. -func ArchiveRepository(request *ArchiveRequest) error { +func ArchiveRepository(request *ArchiveRequest) (*models.RepoArchiver, error) { return doArchive(request) } From 35817181062574ad6c1db70d6abcb62b56bacec7 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 20 Apr 2021 15:15:49 +0800 Subject: [PATCH 07/25] Add status for archiver --- models/migrations/v185.go | 7 ++--- models/repo_archiver.go | 12 ++++++--- routers/web/repo/repo.go | 7 ++++- services/archiver/archiver.go | 48 +++++++++++++++++++++++++++-------- 4 files changed, 57 insertions(+), 17 deletions(-) diff --git a/models/migrations/v185.go b/models/migrations/v185.go index ba77704895d1..096994889703 100644 --- a/models/migrations/v185.go +++ b/models/migrations/v185.go @@ -11,9 +11,10 @@ import ( func addRepoArchiver(x *xorm.Engine) error { // RepoArchiver represents all archivers type RepoArchiver struct { - ID int64 `xorm:"pk autoincr"` - RepoID int64 `xorm:"index unique(s)"` - Type int `xorm:"unique(s)"` + ID int64 `xorm:"pk autoincr"` + RepoID int64 `xorm:"index unique(s)"` + Type int `xorm:"unique(s)"` + Status int CommitID string `xorm:"VARCHAR(40) unique(s)"` CreatedUnix int64 `xorm:"INDEX NOT NULL created"` } diff --git a/models/repo_archiver.go b/models/repo_archiver.go index b9c99e67e840..9dfb410529b0 100644 --- a/models/repo_archiver.go +++ b/models/repo_archiver.go @@ -55,7 +55,7 @@ func (archiver *RepoArchiver) RelativePath() (string, error) { return "", err } - return fmt.Sprintf("%s/%s/%s.%s", repo.RepoPath(), archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()), nil + return fmt.Sprintf("%s/%s/%s.%s", repo.FullName(), archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()), nil } // GetRepoArchiver get an archiver @@ -71,8 +71,14 @@ func GetRepoArchiver(ctx DBContext, repoID int64, tp git.ArchiveType, commitID s return nil, nil } -// AddArchiver adds an archiver -func AddArchiver(ctx DBContext, archiver *RepoArchiver) error { +// AddRepoArchiver adds an archiver +func AddRepoArchiver(ctx DBContext, archiver *RepoArchiver) error { _, err := ctx.e.Insert(archiver) return err } + +// UpdateRepoArchiverStatus updates archiver's status +func UpdateRepoArchiverStatus(ctx DBContext, archiver *RepoArchiver) error { + _, err := ctx.e.ID(archiver.ID).Cols("status").Update(archiver) + return err +} diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index f8f84f84f583..4565d7b909a4 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -431,7 +431,12 @@ func InitiateDownload(ctx *context.Context) { return } + var completed bool + if archiver != nil && archiver.Status == models.RepoArchiverReady { + completed = true + } + ctx.JSON(http.StatusOK, map[string]interface{}{ - "complete": archiver.Status == models.RepoArchiverReady, + "complete": completed, }) } diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index b4f1e12e2cd3..a98e93230828 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -6,8 +6,10 @@ package archiver import ( + "errors" "fmt" "io" + "os" "regexp" "strings" @@ -98,17 +100,23 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { if err != nil { return nil, err } - if archiver != nil { - return archiver, nil - } - archiver = &models.RepoArchiver{ - RepoID: r.repoID, - Type: r.archiveType, - CommitID: r.commitID, - } - if err := models.AddArchiver(ctx, archiver); err != nil { - return nil, err + if archiver != nil { + // FIXME: If another process are generating it, we think it's not ready and just return + // Or we should wait until the archive generated. + if archiver.Status == models.RepoArchiverGenerating { + return nil, nil + } + } else { + archiver = &models.RepoArchiver{ + RepoID: r.repoID, + Type: r.archiveType, + CommitID: r.commitID, + Status: models.RepoArchiverGenerating, + } + if err := models.AddRepoArchiver(ctx, archiver); err != nil { + return nil, err + } } rPath, err := archiver.RelativePath() @@ -116,6 +124,19 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { return nil, err } + _, err = storage.RepoArchives.Stat(rPath) + if err == nil { + if archiver.Status == models.RepoArchiverGenerating { + archiver.Status = models.RepoArchiverReady + return archiver, models.UpdateRepoArchiverStatus(ctx, archiver) + } + return archiver, nil + } + + if !errors.Is(err, os.ErrNotExist) { + return nil, fmt.Errorf("unable to stat archive: %v", err) + } + rd, w := io.Pipe() defer func() { w.Close() @@ -149,6 +170,13 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { return nil, err } + if archiver.Status == models.RepoArchiverGenerating { + archiver.Status = models.RepoArchiverReady + if err = models.UpdateRepoArchiverStatus(ctx, archiver); err != nil { + return nil, err + } + } + return archiver, commiter.Commit() } From 064b7d453dcfd9f702805e0559817a8625cec845 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Tue, 20 Apr 2021 15:31:01 +0800 Subject: [PATCH 08/25] Fix lint --- models/repo_archiver.go | 2 ++ services/archiver/archiver.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/models/repo_archiver.go b/models/repo_archiver.go index 9dfb410529b0..833a22ee13c5 100644 --- a/models/repo_archiver.go +++ b/models/repo_archiver.go @@ -11,8 +11,10 @@ import ( "code.gitea.io/gitea/modules/timeutil" ) +// RepoArchiverStatus represents repo archive status type RepoArchiverStatus int +// enumerate all repo archive statuses const ( RepoArchiverGenerating = iota // the archiver is generating RepoArchiverReady // it's ready diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index a98e93230828..5438a1213f84 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -157,7 +157,7 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { setting.Repository.PrefixArchiveFiles, r.commitID, ) - w.CloseWithError(err) + _ = w.CloseWithError(err) done <- err }(done, w) From 23c970274c48319378edb9d9ce9bdcc277ad5b3d Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Thu, 22 Apr 2021 09:20:22 +0800 Subject: [PATCH 09/25] Add queue --- services/archiver/archiver.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index 5438a1213f84..1bceeb609a73 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -16,6 +16,8 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/queue" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" ) @@ -189,3 +191,29 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { func ArchiveRepository(request *ArchiveRequest) (*models.RepoArchiver, error) { return doArchive(request) } + +var archiverQueue queue.Queue + +// Init initlize archive +func Init() error { + handler := func(data ...queue.Data) { + for _, datum := range data { + archiveReq, ok := datum.(*ArchiveRequest) + if !ok { + log.Error("Unable to process provided datum: %v - not possible to cast to IndexerData", datum) + continue + } + log.Trace("ArchiverData Process: %#v", archiveReq) + if _, err := doArchive(archiveReq); err != nil { + log.Error("Archive %v faild: %v", datum, err) + } + } + } + + archiverQueue = queue.CreateQueue("repo-archive", handler, new(ArchiveRequest)) + if archiverQueue == nil { + return errors.New("unable to create codes indexer queue") + } + + return nil +} From b65f3bf73c5741702a521285382b20f41c14b3a0 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 24 Apr 2021 11:49:50 +0800 Subject: [PATCH 10/25] Add doctor to check and delete old archives --- modules/doctor/checkOldArchives.go | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 modules/doctor/checkOldArchives.go diff --git a/modules/doctor/checkOldArchives.go b/modules/doctor/checkOldArchives.go new file mode 100644 index 000000000000..a4e2ffbd1f0c --- /dev/null +++ b/modules/doctor/checkOldArchives.go @@ -0,0 +1,59 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package doctor + +import ( + "os" + "path/filepath" + + "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" +) + +func checkOldArchives(logger log.Logger, autofix bool) error { + numRepos := 0 + numReposUpdated := 0 + err := iterateRepositories(func(repo *models.Repository) error { + if repo.IsEmpty { + return nil + } + + p := filepath.Join(repo.RepoPath(), "archives") + isDir, err := util.IsDir(p) + if err != nil { + log.Warn("check if %s is directory failed: %v", p, err) + } + if isDir { + numRepos++ + if autofix { + if err := os.RemoveAll(p); err == nil { + numReposUpdated++ + } else { + log.Warn("remove %s failed: %v", p, err) + } + } + } + return nil + }) + + if autofix { + logger.Info("%d / %d old archives in repository deleted", numReposUpdated, numRepos) + } else { + logger.Info("%d old archives in repository need to be deleted", numRepos) + } + + return err +} + +func init() { + Register(&Check{ + Title: "Check old archives", + Name: "check-old-archives", + IsDefault: false, + Run: checkOldArchives, + Priority: 7, + }) +} From 732046da070e73713c11fc958a2ec7135fe3de33 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 24 Apr 2021 17:22:11 +0800 Subject: [PATCH 11/25] Improve archive queue --- routers/init.go | 4 ++ routers/web/repo/repo.go | 58 ++++++++++++++++++++--- services/archiver/archiver.go | 86 ++++++++++++++++++++++------------- 3 files changed, 110 insertions(+), 38 deletions(-) diff --git a/routers/init.go b/routers/init.go index 4c28a953955b..bbf39a3f509e 100644 --- a/routers/init.go +++ b/routers/init.go @@ -33,6 +33,7 @@ import ( "code.gitea.io/gitea/routers/common" "code.gitea.io/gitea/routers/private" web_routers "code.gitea.io/gitea/routers/web" + "code.gitea.io/gitea/services/archiver" "code.gitea.io/gitea/services/auth" "code.gitea.io/gitea/services/mailer" mirror_service "code.gitea.io/gitea/services/mirror" @@ -63,6 +64,9 @@ func NewServices() { mailer.NewContext() _ = cache.NewContext() notification.NewContext() + if err := archiver.Init(); err != nil { + log.Fatal("archiver init failed: %v", err) + } } // GlobalInit is for global configuration reload-able. diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index 4565d7b909a4..6e08ef867e93 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -10,10 +10,12 @@ import ( "fmt" "net/http" "strings" + "time" "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/context" + "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" @@ -377,11 +379,51 @@ func Download(ctx *context.Context) { return } - archiver, err := archiver_service.ArchiveRepository(aReq) + archiver, err := models.GetRepoArchiver(models.DefaultDBContext(), aReq.RepoID, aReq.Type, aReq.CommitID) if err != nil { - ctx.ServerError("ArchiveRepository", err) + ctx.ServerError("models.GetRepoArchiver", err) return } + if archiver != nil && archiver.Status == models.RepoArchiverReady { + download(ctx, aReq.GetArchiveName(), archiver) + return + } + + if err := archiver_service.StartArchive(aReq); err != nil { + ctx.ServerError("archiver_service.StartArchive", err) + return + } + + var times int + var t = time.NewTicker(time.Second * 1) + defer t.Stop() + + for { + select { + case <-graceful.GetManager().HammerContext().Done(): + log.Warn("exit archive downlaod because system stop") + return + case <-t.C: + if times > 20 { + ctx.ServerError("wait download timeout", nil) + return + } + times++ + archiver, err = models.GetRepoArchiver(models.DefaultDBContext(), aReq.RepoID, aReq.Type, aReq.CommitID) + if err != nil { + ctx.ServerError("archiver_service.StartArchive", err) + return + } + if archiver != nil && archiver.Status == models.RepoArchiverReady { + download(ctx, aReq.GetArchiveName(), archiver) + return + } + } + } +} + +func download(ctx *context.Context, archiveName string, archiver *models.RepoArchiver) { + downloadName := ctx.Repo.Repository.Name + "-" + archiveName rPath, err := archiver.RelativePath() if err != nil { @@ -389,8 +431,6 @@ func Download(ctx *context.Context) { return } - downloadName := ctx.Repo.Repository.Name + "-" + aReq.GetArchiveName() - if setting.RepoArchive.ServeDirect { //If we have a signed url (S3, object storage), redirect to this directly. u, err := storage.RepoArchives.URL(rPath, downloadName) @@ -425,11 +465,17 @@ func InitiateDownload(ctx *context.Context) { return } - archiver, err := archiver_service.ArchiveRepository(aReq) + archiver, err := models.GetRepoArchiver(models.DefaultDBContext(), aReq.RepoID, aReq.Type, aReq.CommitID) if err != nil { - ctx.ServerError("archiver_service.ArchiveRepository", err) + ctx.ServerError("archiver_service.StartArchive", err) return } + if archiver == nil || archiver.Status != models.RepoArchiverReady { + if err := archiver_service.StartArchive(aReq); err != nil { + ctx.ServerError("archiver_service.StartArchive", err) + return + } + } var completed bool if archiver != nil && archiver.Status == models.RepoArchiverReady { diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index 1bceeb609a73..b0a112077f44 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -28,13 +28,10 @@ import ( // This is entirely opaque to external entities, though, and mostly used as a // handle elsewhere. type ArchiveRequest struct { - uri string - repoID int64 - repo *git.Repository - refName string - ext string - archiveType git.ArchiveType - commitID string + RepoID int64 + refName string + Type git.ArchiveType + CommitID string } // SHA1 hashes will only go up to 40 characters, but SHA256 hashes will go all @@ -46,38 +43,37 @@ var shaRegex = regexp.MustCompile(`^[0-9a-f]{4,64}$`) // if it's determined that the request still needs to be satisfied. func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest, error) { r := &ArchiveRequest{ - repoID: repoID, - uri: uri, - repo: repo, + RepoID: repoID, } + var ext string switch { case strings.HasSuffix(uri, ".zip"): - r.ext = ".zip" - r.archiveType = git.ZIP + ext = ".zip" + r.Type = git.ZIP case strings.HasSuffix(uri, ".tar.gz"): - r.ext = ".tar.gz" - r.archiveType = git.TARGZ + ext = ".tar.gz" + r.Type = git.TARGZ default: return nil, fmt.Errorf("Unknown format: %s", uri) } - r.refName = strings.TrimSuffix(r.uri, r.ext) + r.refName = strings.TrimSuffix(uri, ext) var err error // Get corresponding commit. - if r.repo.IsBranchExist(r.refName) { - r.commitID, err = r.repo.GetBranchCommitID(r.refName) + if repo.IsBranchExist(r.refName) { + r.CommitID, err = repo.GetBranchCommitID(r.refName) if err != nil { return nil, err } - } else if r.repo.IsTagExist(r.refName) { - r.commitID, err = r.repo.GetTagCommitID(r.refName) + } else if repo.IsTagExist(r.refName) { + r.CommitID, err = repo.GetTagCommitID(r.refName) if err != nil { return nil, err } } else if shaRegex.MatchString(r.refName) { - r.commitID = r.refName + r.CommitID = r.refName } else { return nil, fmt.Errorf("Unknow ref %s type", r.refName) } @@ -88,7 +84,7 @@ func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest // GetArchiveName returns the name of the caller, based on the ref used by the // caller to create this request. func (aReq *ArchiveRequest) GetArchiveName() string { - return aReq.refName + aReq.ext + return aReq.refName + "." + aReq.Type.String() } func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { @@ -98,7 +94,7 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { } defer commiter.Close() - archiver, err := models.GetRepoArchiver(ctx, r.repoID, r.archiveType, r.commitID) + archiver, err := models.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID) if err != nil { return nil, err } @@ -111,9 +107,9 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { } } else { archiver = &models.RepoArchiver{ - RepoID: r.repoID, - Type: r.archiveType, - CommitID: r.commitID, + RepoID: r.RepoID, + Type: r.Type, + CommitID: r.CommitID, Status: models.RepoArchiverGenerating, } if err := models.AddRepoArchiver(ctx, archiver); err != nil { @@ -146,22 +142,34 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { }() var done = make(chan error) - go func(done chan error, w *io.PipeWriter) { + go func(done chan error, w *io.PipeWriter, archiver *models.RepoArchiver) { defer func() { if r := recover(); r != nil { done <- fmt.Errorf("%v", r) } }() - err := r.repo.CreateArchive( + repo, err := archiver.LoadRepo() + if err != nil { + done <- err + return + } + + gitRepo, err := git.OpenRepository(repo.RepoPath()) + if err != nil { + done <- err + return + } + + err = gitRepo.CreateArchive( graceful.GetManager().ShutdownContext(), - r.archiveType, + archiver.Type, w, setting.Repository.PrefixArchiveFiles, - r.commitID, + archiver.CommitID, ) _ = w.CloseWithError(err) done <- err - }(done, w) + }(done, w, archiver) if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil { return nil, fmt.Errorf("unable to write archive: %v", err) @@ -192,7 +200,7 @@ func ArchiveRepository(request *ArchiveRequest) (*models.RepoArchiver, error) { return doArchive(request) } -var archiverQueue queue.Queue +var archiverQueue queue.UniqueQueue // Init initlize archive func Init() error { @@ -210,10 +218,24 @@ func Init() error { } } - archiverQueue = queue.CreateQueue("repo-archive", handler, new(ArchiveRequest)) + archiverQueue = queue.CreateUniqueQueue("repo-archive", handler, new(ArchiveRequest)) if archiverQueue == nil { return errors.New("unable to create codes indexer queue") } + go graceful.GetManager().RunWithShutdownFns(archiverQueue.Run) + return nil } + +// StartArchive push the archive request to the queue +func StartArchive(request *ArchiveRequest) error { + has, err := archiverQueue.Has(request) + if err != nil { + return err + } + if has { + return nil + } + return archiverQueue.Push(request) +} From fd779aaed4e1e070ad6af9002704ac9f368d95e4 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 24 Apr 2021 18:06:21 +0800 Subject: [PATCH 12/25] Fix tests --- .../user27/repo49.git/refs/heads/test/archive | 1 + services/archiver/archiver.go | 10 +++++-- services/archiver/archiver_test.go | 29 ++++++++++++------- 3 files changed, 27 insertions(+), 13 deletions(-) create mode 100644 integrations/gitea-repositories-meta/user27/repo49.git/refs/heads/test/archive diff --git a/integrations/gitea-repositories-meta/user27/repo49.git/refs/heads/test/archive b/integrations/gitea-repositories-meta/user27/repo49.git/refs/heads/test/archive new file mode 100644 index 000000000000..0f13243bfd64 --- /dev/null +++ b/integrations/gitea-repositories-meta/user27/repo49.git/refs/heads/test/archive @@ -0,0 +1 @@ +aacbdfe9e1c4b47f60abe81849045fa4e96f1d75 diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index b0a112077f44..88032df31892 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -73,7 +73,13 @@ func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest return nil, err } } else if shaRegex.MatchString(r.refName) { - r.CommitID = r.refName + if repo.IsCommitExist(r.refName) { + r.CommitID = r.refName + } else { + return nil, git.ErrNotExist{ + ID: r.refName, + } + } } else { return nil, fmt.Errorf("Unknow ref %s type", r.refName) } @@ -84,7 +90,7 @@ func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest // GetArchiveName returns the name of the caller, based on the ref used by the // caller to create this request. func (aReq *ArchiveRequest) GetArchiveName() string { - return aReq.refName + "." + aReq.Type.String() + return strings.Replace(aReq.refName, "/", "-", -1) + "." + aReq.Type.String() } func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { diff --git a/services/archiver/archiver_test.go b/services/archiver/archiver_test.go index f075cd379232..3f3f369987b5 100644 --- a/services/archiver/archiver_test.go +++ b/services/archiver/archiver_test.go @@ -29,34 +29,41 @@ func TestArchive_Basic(t *testing.T) { ctx := test.MockContext(t, "user27/repo49") firstCommit, secondCommit := "51f84af23134", "aacbdfe9e1c4" - bogusReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") - assert.NoError(t, err) - assert.Nil(t, bogusReq) - test.LoadRepo(t, ctx, 49) - bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") - assert.NoError(t, err) - assert.Nil(t, bogusReq) - test.LoadGitRepo(t, ctx) defer ctx.Repo.GitRepo.Close() + bogusReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") + assert.NoError(t, err) + assert.NotNil(t, bogusReq) + assert.EqualValues(t, firstCommit+".zip", bogusReq.GetArchiveName()) + // Check a series of bogus requests. // Step 1, valid commit with a bad extension. bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".dilbert") - assert.NoError(t, err) + assert.Error(t, err) assert.Nil(t, bogusReq) // Step 2, missing commit. bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "dbffff.zip") - assert.NoError(t, err) + assert.Error(t, err) assert.Nil(t, bogusReq) // Step 3, doesn't look like branch/tag/commit. bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "db.zip") - assert.NoError(t, err) + assert.Error(t, err) assert.Nil(t, bogusReq) + bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "master.zip") + assert.NoError(t, err) + assert.NotNil(t, bogusReq) + assert.EqualValues(t, "master.zip", bogusReq.GetArchiveName()) + + bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "test/archive.zip") + assert.NoError(t, err) + assert.NotNil(t, bogusReq) + assert.EqualValues(t, "test-archive.zip", bogusReq.GetArchiveName()) + // Now two valid requests, firstCommit with valid extensions. zipReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip") assert.NoError(t, err) From 86eed1118c168bf0c55c1d808db92f4e021b6383 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 24 Apr 2021 20:05:20 +0800 Subject: [PATCH 13/25] improve archive storage --- routers/web/repo/repo.go | 2 +- services/archiver/archiver.go | 30 ++++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index 6e08ef867e93..919fd4620d25 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -401,7 +401,7 @@ func Download(ctx *context.Context) { for { select { case <-graceful.GetManager().HammerContext().Done(): - log.Warn("exit archive downlaod because system stop") + log.Warn("exit archive download because system stop") return case <-t.C: if times > 20 { diff --git a/services/archiver/archiver.go b/services/archiver/archiver.go index 88032df31892..00c028130681 100644 --- a/services/archiver/archiver.go +++ b/services/archiver/archiver.go @@ -90,7 +90,7 @@ func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest // GetArchiveName returns the name of the caller, based on the ref used by the // caller to create this request. func (aReq *ArchiveRequest) GetArchiveName() string { - return strings.Replace(aReq.refName, "/", "-", -1) + "." + aReq.Type.String() + return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String() } func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { @@ -147,24 +147,23 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { rd.Close() }() var done = make(chan error) + repo, err := archiver.LoadRepo() + if err != nil { + return nil, fmt.Errorf("archiver.LoadRepo failed: %v", err) + } + + gitRepo, err := git.OpenRepository(repo.RepoPath()) + if err != nil { + return nil, err + } + defer gitRepo.Close() - go func(done chan error, w *io.PipeWriter, archiver *models.RepoArchiver) { + go func(done chan error, w *io.PipeWriter, archiver *models.RepoArchiver, gitRepo *git.Repository) { defer func() { if r := recover(); r != nil { done <- fmt.Errorf("%v", r) } }() - repo, err := archiver.LoadRepo() - if err != nil { - done <- err - return - } - - gitRepo, err := git.OpenRepository(repo.RepoPath()) - if err != nil { - done <- err - return - } err = gitRepo.CreateArchive( graceful.GetManager().ShutdownContext(), @@ -175,7 +174,10 @@ func doArchive(r *ArchiveRequest) (*models.RepoArchiver, error) { ) _ = w.CloseWithError(err) done <- err - }(done, w, archiver) + }(done, w, archiver, gitRepo) + + // TODO: add lfs data to zip + // TODO: add submodule data to zip if _, err := storage.RepoArchives.Save(rPath, rd, -1); err != nil { return nil, fmt.Errorf("unable to write archive: %v", err) From dbd829e7eb7f2fca546827483d0a95e8aeae2128 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 24 Apr 2021 21:08:21 +0800 Subject: [PATCH 14/25] Delete repo archives --- models/repo.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/models/repo.go b/models/repo.go index dc4e03a28a11..4c5bdc4196e8 100644 --- a/models/repo.go +++ b/models/repo.go @@ -1587,6 +1587,22 @@ func DeleteRepository(doer *User, uid, repoID int64) error { return err } + // Remove archives + var archives []*RepoArchiver + if err = sess.Where("repo_id=?", repoID).Find(&archives); err != nil { + return err + } + + for _, v := range archives { + v.Repo = repo + p, _ := v.RelativePath() + removeStorageWithNotice(sess, storage.RepoArchives, "Delete repo archive file", p) + } + + if _, err := sess.Delete(&LFSMetaObject{RepositoryID: repoID}); err != nil { + return err + } + if repo.NumForks > 0 { if _, err = sess.Exec("UPDATE `repository` SET fork_id=0,is_fork=? WHERE fork_id=?", false, repo.ID); err != nil { log.Error("reset 'fork_id' and 'is_fork': %v", err) From 3b8994037b4bdcfba3b42d6b9350744321c08378 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 24 Apr 2021 21:27:04 +0800 Subject: [PATCH 15/25] Add missing fixture --- models/fixtures/repo_archiver.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 models/fixtures/repo_archiver.yml diff --git a/models/fixtures/repo_archiver.yml b/models/fixtures/repo_archiver.yml new file mode 100644 index 000000000000..e69de29bb2d1 From 2d2652047c9ed573558cd227f0268781bd74c0f0 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 25 Apr 2021 14:09:09 +0800 Subject: [PATCH 16/25] fix fixture --- models/fixtures/repo_archiver.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/models/fixtures/repo_archiver.yml b/models/fixtures/repo_archiver.yml index e69de29bb2d1..3fc9e456d472 100644 --- a/models/fixtures/repo_archiver.yml +++ b/models/fixtures/repo_archiver.yml @@ -0,0 +1 @@ +[] # empty \ No newline at end of file From 6bcc9dd50f28f7502db057cec9502292643c3d92 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 28 Apr 2021 00:30:02 +0800 Subject: [PATCH 17/25] Fix fixture --- models/fixtures/repo_archiver.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/fixtures/repo_archiver.yml b/models/fixtures/repo_archiver.yml index 3fc9e456d472..ca780a73aa0c 100644 --- a/models/fixtures/repo_archiver.yml +++ b/models/fixtures/repo_archiver.yml @@ -1 +1 @@ -[] # empty \ No newline at end of file +[] # empty From 149fe6d66a584cc37868560c3ad86e4828250a45 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 28 Apr 2021 10:11:43 +0800 Subject: [PATCH 18/25] Fix test --- models/models.go | 1 + 1 file changed, 1 insertion(+) diff --git a/models/models.go b/models/models.go index c325fd381159..3266be0f4ab8 100644 --- a/models/models.go +++ b/models/models.go @@ -136,6 +136,7 @@ func init() { new(RepoTransfer), new(IssueIndex), new(PushMirror), + new(RepoArchiver), ) gonicNames := []string{"SSL", "UID"} From 950ef313c15656bc3de7fca3a7724e9183327f24 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 28 Apr 2021 23:46:25 +0800 Subject: [PATCH 19/25] Fix archiver cleaning --- models/repo.go | 81 +++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 50 deletions(-) diff --git a/models/repo.go b/models/repo.go index 4c5bdc4196e8..3cc30bd8973e 100644 --- a/models/repo.go +++ b/models/repo.go @@ -1784,64 +1784,45 @@ func DeleteRepositoryArchives(ctx context.Context) error { func DeleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration) error { log.Trace("Doing: ArchiveCleanup") - if err := x.Where("id > 0").Iterate(new(Repository), func(idx int, bean interface{}) error { - return deleteOldRepositoryArchives(ctx, olderThan, idx, bean) - }); err != nil { - log.Trace("Error: ArchiveClean: %v", err) - return err - } - - log.Trace("Finished: ArchiveCleanup") - return nil -} - -func deleteOldRepositoryArchives(ctx context.Context, olderThan time.Duration, idx int, bean interface{}) error { - repo := bean.(*Repository) - basePath := filepath.Join(repo.RepoPath(), "archives") - - for _, ty := range []string{"zip", "targz"} { - select { - case <-ctx.Done(): - return ErrCancelledf("before deleting old repository archives with filetype %s for %s", ty, repo.FullName()) - default: - } - - path := filepath.Join(basePath, ty) - file, err := os.Open(path) - if err != nil { - if !os.IsNotExist(err) { - log.Warn("Unable to open directory %s: %v", path, err) - return err - } - - // If the directory doesn't exist, that's okay. - continue - } - - files, err := file.Readdir(0) - file.Close() + for { + var archivers []RepoArchiver + err := x.Where("created_unix < ?", time.Now().Add(-olderThan).Unix()). + Asc("created_unix"). + Limit(100). + Find(&archivers) if err != nil { - log.Warn("Unable to read directory %s: %v", path, err) + log.Trace("Error: ArchiveClean: %v", err) return err } - minimumOldestTime := time.Now().Add(-olderThan) - for _, info := range files { - if info.ModTime().Before(minimumOldestTime) && !info.IsDir() { - select { - case <-ctx.Done(): - return ErrCancelledf("before deleting old repository archive file %s with filetype %s for %s", info.Name(), ty, repo.FullName()) - default: - } - toDelete := filepath.Join(path, info.Name()) - // This is a best-effort purge, so we do not check error codes to confirm removal. - if err = util.Remove(toDelete); err != nil { - log.Trace("Unable to delete %s, but proceeding: %v", toDelete, err) - } + for _, archiver := range archivers { + if err := deleteOldRepoArchiver(ctx, &archiver); err != nil { + return err } } + if len(archivers) < 100 { + break + } } + log.Trace("Finished: ArchiveCleanup") + return nil +} + +var delRepoArchiver = new(RepoArchiver) + +func deleteOldRepoArchiver(ctx context.Context, archiver *RepoArchiver) error { + p, err := archiver.RelativePath() + if err != nil { + return err + } + _, err = x.ID(archiver.ID).Delete(delRepoArchiver) + if err != nil { + return err + } + if err := storage.RepoArchives.Delete(p); err != nil { + log.Error("delete repo archive file failed: %v", err) + } return nil } From 45ad7ab437c0562b4859907d0dbfa8d425cfa351 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Mon, 31 May 2021 16:03:09 +0800 Subject: [PATCH 20/25] Fix bug --- models/repo.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/repo.go b/models/repo.go index 3cc30bd8973e..2baf6e9bdd94 100644 --- a/models/repo.go +++ b/models/repo.go @@ -1599,7 +1599,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error { removeStorageWithNotice(sess, storage.RepoArchives, "Delete repo archive file", p) } - if _, err := sess.Delete(&LFSMetaObject{RepositoryID: repoID}); err != nil { + if _, err := sess.Delete(&RepoArchiver{RepoID: repoID}); err != nil { return err } From aaa55d135f2347967b42f226706894c7b7cbf13f Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 2 Jun 2021 08:58:51 +0800 Subject: [PATCH 21/25] Add docs for repository archive storage --- custom/conf/app.example.ini | 10 ++++++++++ .../doc/advanced/config-cheat-sheet.en-us.md | 17 +++++++++++++++++ .../doc/advanced/config-cheat-sheet.zh-cn.md | 15 +++++++++++++++ modules/setting/repository.go | 6 +++--- 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 38759b8a4b8f..341a61b1a71c 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2039,6 +2039,16 @@ PATH = ;; storage type ;STORAGE_TYPE = local +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; settings for repository archives, will override storage setting +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;[repo-archive] +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; storage type +;STORAGE_TYPE = local + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; lfs storage will override storage diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 35deeac02e22..ba6058c34445 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -991,6 +991,23 @@ MINIO_USE_SSL = false And used by `[attachment]`, `[lfs]` and etc. as `STORAGE_TYPE`. +## Repository Archive (`repo-archive`) + +Configuration for repository archive included storage. It will be derived from default `[storage]` or +`[storage.xxx]` when set `STORAGE_TYPE` to `xxx`. When derived, the default of `PATH` +is `data/repo-archive` and the default of `MINIO_BASE_PATH` is `repo-archive/`. + +- `STORAGE_TYPE`: **local**: Storage type for repo archive, `local` for local disk or `minio` for s3 compatible object storage service or other name defined with `[storage.xxx]` +- `SERVE_DIRECT`: **false**: Allows the storage driver to redirect to authenticated URLs to serve files directly. Currently, only Minio/S3 is supported via signed URLs, local does nothing. +- `PATH`: **./data/repo-archive**: Where to store LFS files, only available when `STORAGE_TYPE` is `local`. If not set it fall back to deprecated LFS_CONTENT_PATH value in [server] section. +- `MINIO_ENDPOINT`: **localhost:9000**: Minio endpoint to connect only available when `STORAGE_TYPE` is `minio` +- `MINIO_ACCESS_KEY_ID`: Minio accessKeyID to connect only available when `STORAGE_TYPE` is `minio` +- `MINIO_SECRET_ACCESS_KEY`: Minio secretAccessKey to connect only available when `STORAGE_TYPE is` `minio` +- `MINIO_BUCKET`: **gitea**: Minio bucket to store the lfs only available when `STORAGE_TYPE` is `minio` +- `MINIO_LOCATION`: **us-east-1**: Minio location to create bucket only available when `STORAGE_TYPE` is `minio` +- `MINIO_BASE_PATH`: **repo-archive/**: Minio base path on the bucket only available when `STORAGE_TYPE` is `minio` +- `MINIO_USE_SSL`: **false**: Minio enabled ssl only available when `STORAGE_TYPE` is `minio` + ## Other (`other`) - `SHOW_FOOTER_BRANDING`: **false**: Show Gitea branding in the footer. diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md index 79cfd94cc714..dd5fad48a3b6 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md +++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md @@ -382,6 +382,21 @@ MINIO_USE_SSL = false 然后你在 `[attachment]`, `[lfs]` 等中可以把这个名字用作 `STORAGE_TYPE` 的值。 +## Repository Archive (`repo-archive`) + +Repository archive 的存储配置。 如果 `STORAGE_TYPE` 为空,则此配置将从 `[storage]` 继承。如果不为 `local` 或者 `minio` 而为 `xxx`, 则从 `[storage.xxx]` 继承。当继承时, `PATH` 默认为 `data/repo-archive`,`MINIO_BASE_PATH` 默认为 `repo-archive/`。 + +- `STORAGE_TYPE`: **local**: Repository archive 的存储类型,`local` 将存储到磁盘,`minio` 将存储到 s3 兼容的对象服务。 +- `SERVE_DIRECT`: **false**: 允许直接重定向到存储系统。当前,仅 Minio/S3 是支持的。 +- `PATH`: 存放 Repository archive 命令上传的文件的地方,默认是 `data/repo-archive`。 +- `MINIO_ENDPOINT`: **localhost:9000**: Minio 地址,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_ACCESS_KEY_ID`: Minio accessKeyID,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_SECRET_ACCESS_KEY`: Minio secretAccessKey,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_BUCKET`: **gitea**: Minio bucket,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_LOCATION`: **us-east-1**: Minio location ,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_BASE_PATH`: **repo-archive/**: Minio base path ,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_USE_SSL`: **false**: Minio 是否启用 ssl ,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 + ## Other (`other`) - `SHOW_FOOTER_BRANDING`: 为真则在页面底部显示Gitea的字样。 diff --git a/modules/setting/repository.go b/modules/setting/repository.go index 0db6e507b013..2b14f7326028 100644 --- a/modules/setting/repository.go +++ b/modules/setting/repository.go @@ -333,7 +333,7 @@ func newRepository() { Repository.Upload.TempPath = path.Join(AppWorkPath, Repository.Upload.TempPath) } - repoAvatarSec := Cfg.Section("repo-archive") - storageType := repoAvatarSec.Key("STORAGE_TYPE").MustString("") - RepoArchive.Storage = getStorage("repo-archive", storageType, repoAvatarSec) + repoArchiveSec := Cfg.Section("repo-archive") + storageType := repoArchiveSec.Key("STORAGE_TYPE").MustString("") + RepoArchive.Storage = getStorage("repo-archive", storageType, repoArchiveSec) } From fe149b1d315b72576d7b3afe8f5d0bd6c3bf43e6 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 4 Jun 2021 20:36:08 +0800 Subject: [PATCH 22/25] remove repo-archive configuration --- custom/conf/app.example.ini | 2 +- .../doc/advanced/config-cheat-sheet.en-us.md | 8 ++++---- .../doc/advanced/config-cheat-sheet.zh-cn.md | 18 +++++++++--------- models/unit_tests.go | 2 +- modules/setting/repository.go | 4 +--- modules/setting/storage.go | 8 +++++--- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 341a61b1a71c..69eb81036c78 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2043,7 +2043,7 @@ PATH = ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; settings for repository archives, will override storage setting ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;[repo-archive] +;[storage.repo-archive] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; storage type diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index ba6058c34445..20edf466cb81 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -991,15 +991,15 @@ MINIO_USE_SSL = false And used by `[attachment]`, `[lfs]` and etc. as `STORAGE_TYPE`. -## Repository Archive (`repo-archive`) +## Repository Archive Storage (`storage.repo-archive`) -Configuration for repository archive included storage. It will be derived from default `[storage]` or -`[storage.xxx]` when set `STORAGE_TYPE` to `xxx`. When derived, the default of `PATH` +Configuration for repository archive storage. It will inherit from default `[storage]` or +`[storage.xxx]` when set `STORAGE_TYPE` to `xxx`. The default of `PATH` is `data/repo-archive` and the default of `MINIO_BASE_PATH` is `repo-archive/`. - `STORAGE_TYPE`: **local**: Storage type for repo archive, `local` for local disk or `minio` for s3 compatible object storage service or other name defined with `[storage.xxx]` - `SERVE_DIRECT`: **false**: Allows the storage driver to redirect to authenticated URLs to serve files directly. Currently, only Minio/S3 is supported via signed URLs, local does nothing. -- `PATH`: **./data/repo-archive**: Where to store LFS files, only available when `STORAGE_TYPE` is `local`. If not set it fall back to deprecated LFS_CONTENT_PATH value in [server] section. +- `PATH`: **./data/repo-archive**: Where to store archive files, only available when `STORAGE_TYPE` is `local`. - `MINIO_ENDPOINT`: **localhost:9000**: Minio endpoint to connect only available when `STORAGE_TYPE` is `minio` - `MINIO_ACCESS_KEY_ID`: Minio accessKeyID to connect only available when `STORAGE_TYPE` is `minio` - `MINIO_SECRET_ACCESS_KEY`: Minio secretAccessKey to connect only available when `STORAGE_TYPE is` `minio` diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md index dd5fad48a3b6..2303a631d56c 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md +++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md @@ -382,20 +382,20 @@ MINIO_USE_SSL = false 然后你在 `[attachment]`, `[lfs]` 等中可以把这个名字用作 `STORAGE_TYPE` 的值。 -## Repository Archive (`repo-archive`) +## Repository Archive Storage (`storage.repo-archive`) Repository archive 的存储配置。 如果 `STORAGE_TYPE` 为空,则此配置将从 `[storage]` 继承。如果不为 `local` 或者 `minio` 而为 `xxx`, 则从 `[storage.xxx]` 继承。当继承时, `PATH` 默认为 `data/repo-archive`,`MINIO_BASE_PATH` 默认为 `repo-archive/`。 - `STORAGE_TYPE`: **local**: Repository archive 的存储类型,`local` 将存储到磁盘,`minio` 将存储到 s3 兼容的对象服务。 - `SERVE_DIRECT`: **false**: 允许直接重定向到存储系统。当前,仅 Minio/S3 是支持的。 -- `PATH`: 存放 Repository archive 命令上传的文件的地方,默认是 `data/repo-archive`。 -- `MINIO_ENDPOINT`: **localhost:9000**: Minio 地址,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 -- `MINIO_ACCESS_KEY_ID`: Minio accessKeyID,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 -- `MINIO_SECRET_ACCESS_KEY`: Minio secretAccessKey,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 -- `MINIO_BUCKET`: **gitea**: Minio bucket,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 -- `MINIO_LOCATION`: **us-east-1**: Minio location ,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 -- `MINIO_BASE_PATH`: **repo-archive/**: Minio base path ,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 -- `MINIO_USE_SSL`: **false**: Minio 是否启用 ssl ,仅当 `LFS_STORAGE_TYPE` 为 `minio` 时有效。 +- `PATH`: 存放 Repository archive 上传的文件的地方,默认是 `data/repo-archive`。 +- `MINIO_ENDPOINT`: **localhost:9000**: Minio 地址,仅当 `STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_ACCESS_KEY_ID`: Minio accessKeyID,仅当 `STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_SECRET_ACCESS_KEY`: Minio secretAccessKey,仅当 `STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_BUCKET`: **gitea**: Minio bucket,仅当 `STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_LOCATION`: **us-east-1**: Minio location ,仅当 `STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_BASE_PATH`: **repo-archive/**: Minio base path ,仅当 `STORAGE_TYPE` 为 `minio` 时有效。 +- `MINIO_USE_SSL`: **false**: Minio 是否启用 ssl ,仅当 `STORAGE_TYPE` 为 `minio` 时有效。 ## Other (`other`) diff --git a/models/unit_tests.go b/models/unit_tests.go index 08a6e6009104..f8d681933361 100644 --- a/models/unit_tests.go +++ b/models/unit_tests.go @@ -74,7 +74,7 @@ func MainTest(m *testing.M, pathToGiteaRoot string) { setting.RepoAvatar.Storage.Path = filepath.Join(setting.AppDataPath, "repo-avatars") - setting.RepoArchive.Storage.Path = filepath.Join(setting.AppDataPath, "repo-archives") + setting.RepoArchive.Storage.Path = filepath.Join(setting.AppDataPath, "repo-archive") if err = storage.Init(); err != nil { fatalTestError("storage.Init: %v\n", err) diff --git a/modules/setting/repository.go b/modules/setting/repository.go index 2b14f7326028..c2a6357d9462 100644 --- a/modules/setting/repository.go +++ b/modules/setting/repository.go @@ -333,7 +333,5 @@ func newRepository() { Repository.Upload.TempPath = path.Join(AppWorkPath, Repository.Upload.TempPath) } - repoArchiveSec := Cfg.Section("repo-archive") - storageType := repoArchiveSec.Key("STORAGE_TYPE").MustString("") - RepoArchive.Storage = getStorage("repo-archive", storageType, repoArchiveSec) + RepoArchive.Storage = getStorage("repo-archive", "", nil) } diff --git a/modules/setting/storage.go b/modules/setting/storage.go index 3ab08d8d2a02..64368ae29d9c 100644 --- a/modules/setting/storage.go +++ b/modules/setting/storage.go @@ -64,9 +64,11 @@ func getStorage(name, typ string, targetSec *ini.Section) Storage { overrides = append(overrides, sec) for _, override := range overrides { - for _, key := range override.Keys() { - if !targetSec.HasKey(key.Name()) { - _, _ = targetSec.NewKey(key.Name(), key.Value()) + if targetSec != nil { + for _, key := range override.Keys() { + if !targetSec.HasKey(key.Name()) { + _, _ = targetSec.NewKey(key.Name(), key.Value()) + } } } if len(storage.Type) == 0 { From 739ac2569072700475322ff29302085a15d583a6 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 4 Jun 2021 21:41:37 +0800 Subject: [PATCH 23/25] Fix test --- modules/setting/storage.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/setting/storage.go b/modules/setting/storage.go index 64368ae29d9c..b344a04c02e5 100644 --- a/modules/setting/storage.go +++ b/modules/setting/storage.go @@ -43,6 +43,10 @@ func getStorage(name, typ string, targetSec *ini.Section) Storage { sec.Key("MINIO_LOCATION").MustString("us-east-1") sec.Key("MINIO_USE_SSL").MustBool(false) + if targetSec == nil { + targetSec = &ini.Section{} + } + var storage Storage storage.Section = targetSec storage.Type = typ @@ -64,11 +68,9 @@ func getStorage(name, typ string, targetSec *ini.Section) Storage { overrides = append(overrides, sec) for _, override := range overrides { - if targetSec != nil { - for _, key := range override.Keys() { - if !targetSec.HasKey(key.Name()) { - _, _ = targetSec.NewKey(key.Name(), key.Value()) - } + for _, key := range override.Keys() { + if !targetSec.HasKey(key.Name()) { + _, _ = targetSec.NewKey(key.Name(), key.Value()) } } if len(storage.Type) == 0 { From aa19e02539bbfb4ec28d74fd4440ca62e230b744 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Fri, 4 Jun 2021 23:14:49 +0800 Subject: [PATCH 24/25] Fix test --- modules/setting/storage.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/setting/storage.go b/modules/setting/storage.go index b344a04c02e5..075152db59cd 100644 --- a/modules/setting/storage.go +++ b/modules/setting/storage.go @@ -44,7 +44,7 @@ func getStorage(name, typ string, targetSec *ini.Section) Storage { sec.Key("MINIO_USE_SSL").MustBool(false) if targetSec == nil { - targetSec = &ini.Section{} + targetSec, _ = Cfg.NewSection(name) } var storage Storage From 486ecc8f188bcb2821c236b851cdc538db0ef4cf Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Mon, 14 Jun 2021 16:50:48 +0800 Subject: [PATCH 25/25] Fix lint --- routers/api/v1/repo/file.go | 3 ++- routers/common/repo.go | 26 -------------------------- routers/web/web.go | 3 +-- 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go index 39a60df33f01..e6427ea4f4d2 100644 --- a/routers/api/v1/repo/file.go +++ b/routers/api/v1/repo/file.go @@ -18,6 +18,7 @@ import ( api "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/web" "code.gitea.io/gitea/routers/common" + "code.gitea.io/gitea/routers/web/repo" ) // GetRawFile get a file by path on a repository @@ -126,7 +127,7 @@ func GetArchive(ctx *context.APIContext) { ctx.Repo.GitRepo = gitRepo defer gitRepo.Close() - common.Download(ctx.Context) + repo.Download(ctx.Context) } // GetEditorconfig get editor config of a repository diff --git a/routers/common/repo.go b/routers/common/repo.go index c61b5ec57f52..22403da09765 100644 --- a/routers/common/repo.go +++ b/routers/common/repo.go @@ -7,7 +7,6 @@ package common import ( "fmt" "io" - "net/http" "path" "path/filepath" "strings" @@ -19,7 +18,6 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/typesniffer" - "code.gitea.io/gitea/services/archiver" ) // ServeBlob download a git.Blob @@ -41,30 +39,6 @@ func ServeBlob(ctx *context.Context, blob *git.Blob) error { return ServeData(ctx, ctx.Repo.TreePath, blob.Size(), dataRc) } -// Download an archive of a repository -func Download(ctx *context.Context) { - uri := ctx.Params("*") - aReq := archiver.DeriveRequestFrom(ctx, uri) - - if aReq == nil { - ctx.Error(http.StatusNotFound) - return - } - - downloadName := ctx.Repo.Repository.Name + "-" + aReq.GetArchiveName() - complete := aReq.IsComplete() - if !complete { - aReq = archiver.ArchiveRepository(aReq) - complete = aReq.WaitForCompletion(ctx) - } - - if complete { - ctx.ServeFile(aReq.GetArchivePath(), downloadName) - } else { - ctx.Error(http.StatusNotFound) - } -} - // ServeData download file from io.Reader func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error { buf := make([]byte, 1024) diff --git a/routers/web/web.go b/routers/web/web.go index 2c8a6411a1d1..883213479c86 100644 --- a/routers/web/web.go +++ b/routers/web/web.go @@ -22,7 +22,6 @@ import ( "code.gitea.io/gitea/modules/validation" "code.gitea.io/gitea/modules/web" "code.gitea.io/gitea/routers/api/v1/misc" - "code.gitea.io/gitea/routers/common" "code.gitea.io/gitea/routers/web/admin" "code.gitea.io/gitea/routers/web/dev" "code.gitea.io/gitea/routers/web/events" @@ -888,7 +887,7 @@ func RegisterRoutes(m *web.Route) { }, context.RepoRef(), repo.MustBeNotEmpty, context.RequireRepoReaderOr(models.UnitTypeCode)) m.Group("/archive", func() { - m.Get("/*", common.Download) + m.Get("/*", repo.Download) m.Post("/*", repo.InitiateDownload) }, repo.MustBeNotEmpty, reqRepoCodeReader)