Skip to content

Commit

Permalink
Wait for namespace to terminate before restoring
Browse files Browse the repository at this point in the history
Fixes #691

Signed-off-by: Nolan Brubaker <nolan@heptio.com>
  • Loading branch information
Nolan Brubaker committed Sep 7, 2018
1 parent 5ccc27a commit da69881
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 10 deletions.
12 changes: 8 additions & 4 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ const (
)

type serverConfig struct {
pluginDir, metricsAddress, defaultBackupLocation string
backupSyncPeriod, podVolumeOperationTimeout time.Duration
restoreResourcePriorities []string
restoreOnly bool
pluginDir, metricsAddress, defaultBackupLocation string
backupSyncPeriod, podVolumeOperationTimeout, namespaceTimeout time.Duration
restoreResourcePriorities []string
restoreOnly bool
}

func NewCommand() *cobra.Command {
Expand All @@ -92,6 +92,7 @@ func NewCommand() *cobra.Command {
backupSyncPeriod: defaultBackupSyncPeriod,
podVolumeOperationTimeout: defaultPodVolumeOperationTimeout,
restoreResourcePriorities: defaultRestorePriorities,
namespaceTimeout: defaultNamespaceTimeout,
}
)

Expand Down Expand Up @@ -142,6 +143,7 @@ func NewCommand() *cobra.Command {
command.Flags().BoolVar(&config.restoreOnly, "restore-only", config.restoreOnly, "run in a mode where only restores are allowed; backups, schedules, and garbage-collection are all disabled")
command.Flags().StringSliceVar(&config.restoreResourcePriorities, "restore-resource-priorities", config.restoreResourcePriorities, "desired order of resource restores; any resource not in the list will be restored alphabetically after the prioritized resources")
command.Flags().StringVar(&config.defaultBackupLocation, "default-backup-storage-location", config.defaultBackupLocation, "name of the default backup storage location")
command.Flags().DurationVar(&config.namespaceTimeout, "namespace-timeout", config.namespaceTimeout, "duration to wait on namespace termination before failing a restore")

return command
}
Expand Down Expand Up @@ -416,6 +418,7 @@ func (s *server) loadConfig() (*api.Config, error) {
const (
defaultBackupSyncPeriod = 60 * time.Minute
defaultPodVolumeOperationTimeout = 60 * time.Minute
defaultNamespaceTimeout = 10 * time.Minute
)

// - Namespaces go first because all namespaced resources depend on them.
Expand Down Expand Up @@ -669,6 +672,7 @@ func (s *server) runControllers(config *api.Config, defaultBackupLocation *api.B
s.kubeClient.CoreV1().Namespaces(),
s.resticManager,
s.config.podVolumeOperationTimeout,
s.config.namespaceTimeout,
s.logger,
)
cmd.CheckError(err)
Expand Down
66 changes: 65 additions & 1 deletion pkg/restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ type kubernetesRestorer struct {
namespaceClient corev1.NamespaceInterface
resticRestorerFactory restic.RestorerFactory
resticTimeout time.Duration
namespaceTimeout time.Duration
resourcePriorities []string
fileSystem filesystem.Interface
logger logrus.FieldLogger
Expand Down Expand Up @@ -151,6 +152,7 @@ func NewKubernetesRestorer(
namespaceClient corev1.NamespaceInterface,
resticRestorerFactory restic.RestorerFactory,
resticTimeout time.Duration,
namespaceTimeout time.Duration,
logger logrus.FieldLogger,
) (Restorer, error) {
return &kubernetesRestorer{
Expand All @@ -161,6 +163,7 @@ func NewKubernetesRestorer(
namespaceClient: namespaceClient,
resticRestorerFactory: resticRestorerFactory,
resticTimeout: resticTimeout,
namespaceTimeout: namespaceTimeout,
resourcePriorities: resourcePriorities,
logger: logger,

Expand Down Expand Up @@ -242,6 +245,7 @@ func (kr *kubernetesRestorer) Restore(log logrus.FieldLogger, restore *api.Resto
resticRestorer: resticRestorer,
pvsToProvision: sets.NewString(),
pvRestorer: pvRestorer,
namespaceTimeout: kr.namespaceTimeout,
}

return restoreCtx.execute()
Expand Down Expand Up @@ -326,6 +330,7 @@ type context struct {
resourceWatches []watch.Interface
pvsToProvision sets.String
pvRestorer PVRestorer
namespaceTimeout time.Duration
}

func (ctx *context) infof(msg string, args ...interface{}) {
Expand Down Expand Up @@ -459,11 +464,34 @@ func (ctx *context) restoreFromDir(dir string) (api.RestoreResult, api.RestoreRe
if !existingNamespaces.Has(mappedNsName) {
logger := ctx.logger.WithField("namespace", nsName)
ns := getNamespace(logger, filepath.Join(dir, api.ResourcesDir, "namespaces", api.ClusterScopedDir, nsName+".json"), mappedNsName)
if _, err := kube.EnsureNamespaceExists(ns, ctx.namespaceClient); err != nil {
nsIsReady, err := kube.EnsureNamespaceExistsAndIsReady(ns, ctx.namespaceClient)
if err != nil {
addArkError(&errs, err)
continue
}

if !nsIsReady {
namespaceWatch, err := ctx.namespaceClient.Watch(metav1.ListOptions{})
if err != nil {
// TODO: make this error more helpful
addArkError(&errs, err)
continue
}
// TODO: maybe make these their own things vs using the resources
ctx.resourceWatches = append(ctx.resourceWatches, namespaceWatch)
ctx.resourceWaitGroup.Add(1)
go func() {
defer ctx.resourceWaitGroup.Done()

if _, err := waitForNamespaceDelete(namespaceWatch.ResultChan(), mappedNsName, ctx.namespaceTimeout, ctx.logger); err != nil {
// TODO: Abandon the restore and mark it as failed at this point, the timeout is passed
ctx.logger.Warnf("Timeout reached waiting for namespace %s to terminate", ns)
addArkError(&warnings, fmt.Errorf("timeout reached waiting for namespace %s to terminate", mappedNsName))
return
}
}()
}

// keep track of namespaces that we know exist so we don't
// have to try to create them multiple times
existingNamespaces.Insert(mappedNsName)
Expand Down Expand Up @@ -894,6 +922,42 @@ func waitForReady(
}
}

func waitForNamespaceDelete(
watchChan <-chan watch.Event,
name string,
timeout time.Duration,
log logrus.FieldLogger,
) (bool, error) {
var timeoutChan <-chan time.Time
if timeout != 0 {
timeoutChan = time.After(timeout)
} else {
timeoutChan = make(chan time.Time)
}

for {
select {
case event := <-watchChan:
if event.Type != watch.Deleted {
continue
}

obj, ok := event.Object.(*v1.Namespace)
switch {
case !ok:
log.Errorf("Unexpected type %T", event.Object)
continue
case obj.Name != name:
continue
default:
return true, nil
}
case <-timeoutChan:
return false, errors.New("failed to observe namespace terminate within the timeout")
}
}
}

type PVRestorer interface {
executePVAction(obj *unstructured.Unstructured) (*unstructured.Unstructured, error)
}
Expand Down
18 changes: 13 additions & 5 deletions pkg/util/kube/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,23 @@ func NamespaceAndName(objMeta metav1.Object) string {
return fmt.Sprintf("%s/%s", objMeta.GetNamespace(), objMeta.GetName())
}

// EnsureNamespaceExists attempts to create the provided Kubernetes namespace. It returns two values:
// a bool indicating whether or not the namespace was created, and an error if the create failed
// EnsureNamespaceExistsAndIsReady attempts to create the provided Kubernetes namespace. It returns two values:
// a bool indicating whether or not the namespace is ready, and an error if the create failed
// for a reason other than that the namespace already exists. Note that in the case where the
// namespace already exists, this function will return (false, nil).
func EnsureNamespaceExists(namespace *corev1api.Namespace, client corev1client.NamespaceInterface) (bool, error) {
// namespace already exists and is not ready, this function will return (false, nil).
func EnsureNamespaceExistsAndIsReady(namespace *corev1api.Namespace, client corev1client.NamespaceInterface) (bool, error) {
if _, err := client.Create(namespace); err == nil {
return true, nil
} else if apierrors.IsAlreadyExists(err) {
return false, nil
// Do a follow up Get because Create returns an uninitialized namespace object, not the one that exists.
ns, err := client.Get(namespace.Name, metav1.GetOptions{})
if err != nil {
return false, errors.Wrapf(err, "error getting namespace %s", namespace.Name)
}
if ns.Status.Phase == corev1api.NamespaceTerminating {
return false, nil
}
return true, nil
} else {
return false, errors.Wrapf(err, "error creating namespace %s", namespace.Name)
}
Expand Down

0 comments on commit da69881

Please sign in to comment.