Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

periodically check for stale restic repo locks #1708

Merged
merged 4 commits into from
Jul 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelogs/unreleased/1708-skriss
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
remove any stale locks from restic repositories every 5m
34 changes: 27 additions & 7 deletions pkg/controller/restic_repository_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package controller

import (
"encoding/json"
"strings"
"time"

jsonpatch "github.com/evanphx/json-patch"
Expand Down Expand Up @@ -120,9 +121,19 @@ func (c *resticRepositoryController) processQueueItem(key string) error {
// Don't mutate the shared cache
reqCopy := req.DeepCopy()

switch req.Status.Phase {
case "", v1.ResticRepositoryPhaseNew:
if req.Status.Phase == "" || req.Status.Phase == v1.ResticRepositoryPhaseNew {
return c.initializeRepo(reqCopy, log)
}

// If the repository is ready or not-ready, check it for stale locks, but if
// this fails for any reason, it's non-critical so we still continue on to the
// rest of the "process" logic.
log.Debug("Checking repository for stale locks")
if err := c.repositoryManager.UnlockRepo(reqCopy); err != nil {
log.WithError(err).Error("Error checking repository for stale locks")
}

switch req.Status.Phase {
case v1.ResticRepositoryPhaseReady:
return c.runMaintenanceIfDue(reqCopy, log)
case v1.ResticRepositoryPhaseNotReady:
Expand Down Expand Up @@ -162,14 +173,23 @@ func (c *resticRepositoryController) initializeRepo(req *v1.ResticRepository, lo
})
}

// ensureRepo first tries to connect to the repo, and returns if it succeeds. If it fails,
// it attempts to init the repo, and returns the result.
// ensureRepo checks to see if a repository exists, and attempts to initialize it if
// it does not exist. An error is returned if the repository can't be connected to
// or initialized.
func ensureRepo(repo *v1.ResticRepository, repoManager restic.RepositoryManager) error {
if repoManager.ConnectToRepo(repo) == nil {
return nil
if err := repoManager.ConnectToRepo(repo); err != nil {
// If the repository has not yet been initialized, the error message will always include
// the following string. This is the only scenario where we should try to initialize it.
// Other errors (e.g. "already locked") should be returned as-is since the repository
// does already exist, but it can't be connected to.
if strings.Contains(err.Error(), "Is there a repository at the following location?") {
return repoManager.InitRepo(repo)
}

return err
}

return repoManager.InitRepo(repo)
return nil
}

func (c *resticRepositoryController) runMaintenanceIfDue(req *v1.ResticRepository, log logrus.FieldLogger) error {
Expand Down
9 changes: 8 additions & 1 deletion pkg/restic/command_factory.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright 2018 the Velero contributors.
Copyright 2018, 2019 the Velero contributors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -112,3 +112,10 @@ func ForgetCommand(repoIdentifier, snapshotID string) *Command {
Args: []string{snapshotID},
}
}

func UnlockCommand(repoIdentifier string) *Command {
return &Command{
Command: "unlock",
RepoIdentifier: repoIdentifier,
}
}
13 changes: 12 additions & 1 deletion pkg/restic/repository_manager.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright 2018 the Velero contributors.
Copyright 2018, 2019 the Velero contributors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -55,6 +55,9 @@ type RepositoryManager interface {
// PruneRepo deletes unused data from a repo.
PruneRepo(repo *velerov1api.ResticRepository) error

// UnlockRepo removes stale locks from a repo.
UnlockRepo(repo *velerov1api.ResticRepository) error

// Forget removes a snapshot from the list of
// available snapshots in a repo.
Forget(context.Context, SnapshotIdentifier) error
Expand Down Expand Up @@ -213,6 +216,14 @@ func (rm *repositoryManager) PruneRepo(repo *velerov1api.ResticRepository) error
return rm.exec(PruneCommand(repo.Spec.ResticIdentifier), repo.Spec.BackupStorageLocation)
}

func (rm *repositoryManager) UnlockRepo(repo *velerov1api.ResticRepository) error {
// restic unlock requires a non-exclusive lock
rm.repoLocker.Lock(repo.Name)
defer rm.repoLocker.Unlock(repo.Name)

return rm.exec(UnlockCommand(repo.Spec.ResticIdentifier), repo.Spec.BackupStorageLocation)
}

func (rm *repositoryManager) Forget(ctx context.Context, snapshot SnapshotIdentifier) error {
// We can't wait for this in the constructor, because this informer is coming
// from the shared informer factory, which isn't started until *after* the repo
Expand Down