From e3b92f5556128707f69b20d635ceb96fc8553120 Mon Sep 17 00:00:00 2001
From: Ming Qiu <mqiu@vmware.com>
Date: Thu, 1 Feb 2024 13:27:59 +0800
Subject: [PATCH] Add design for repository maintenance job

Signed-off-by: Ming Qiu <mqiu@vmware.com>
---
 design/repository-maintenance.md | 236 +++++++++++++++++++++++++++++++
 1 file changed, 236 insertions(+)
 create mode 100644 design/repository-maintenance.md

diff --git a/design/repository-maintenance.md b/design/repository-maintenance.md
new file mode 100644
index 0000000000..09193b1e8e
--- /dev/null
+++ b/design/repository-maintenance.md
@@ -0,0 +1,236 @@
+# Design for repository maintenance job
+
+## Abstract
+This design proposal aims to decouple repository maintenance from the Velero server by launching a maintenance job when needed, to mitigate the impact on the Velero server during filesystem-level backups.
+
+## Background
+During filesystem-level backups, Velero performs periodic maintenance on the repository. This operation may consumes significant CPU and memory resources in some cases, leading to potential issues such as the Velero server being killed by OOM. This proposal addresses these challenges by separating repository maintenance from the Velero server.
+
+## Goals
+1. **Independent Repository Maintenance**: Decouple maintenance from Velero's main logic to reduce the impact on the Velero server pod.
+
+2. **Configurable Resources Usage**: Make the resources used by the maintenance job configurable.
+
+3. **No API Changes**: Retain existing APIs and workflow in the backup repository controller.
+
+## Non Goals
+We have lots of concerns over parallel maintenance, which will increase the complexity of our design currently.
+
+ - Non-blocking maintenance job: it may conflict with updating the same `backuprepositories` CR when parallel maintenance.
+
+ - Maintenance job concurrency control: there is no one suitable mechanism in Kubernetes to control the concurrency of different jobs.
+
+ - Parallel maintenance: Maintaining the same repo by multiple jobs at the same time would have some compatible cases that some providers may not support.
+
+Unfortunately, parallel maintenance is currently not a priority because of the concerns above, improving maintenance efficiency is not the primary focus at this stage.
+
+## High-Level Design
+1. **Add Maintenance CLI Command**: Introduce a new Velero CLI command for repository maintenance.
+
+2. **Create Jobs by Repository Manager**: Modify the backup repository controller to create a maintenance job instead of directly calling the multiple chain calls for Kopia or Restic maintenance.
+
+3. **Update Maintenance Job Result in BackupRepository CR**: Retrieve the result of the maintenance job and update the status of the `BackupRepository` CR accordingly.
+
+4. **Add Setting for Maintenance Job**: Introduce a configuration option to set maintenance jobs, including resource limits (CPU and memory), keeping the latest N failed maintenance jobs.
+
+4. **Clear Up Maintenance Jobs**: Introduce a configuration to determine the number of failed maintenance jobs to retain for debugging purposes.
+
+
+## Detailed Design
+
+### 1. Add Maintenance CLI Command
+
+The CLI command will be added to the Velero CLI.
+
+```shell
+$ velero maintenance --repo-name $repo-name --repo-type $repo-type --backup-storage-location $bsl
+```
+
+The main maintenance logic would be using the repository provider to do the maintenance.
+```golang
+func (o *Options) Run(f veleroCli.Factory) {
+	...
+	credentialFileStore, err := credentials.NewNamespacedFileStore(
+		cli,
+		f.Namespace(),
+		"/tmp/credentials",
+		filesystem.NewFileSystem(),
+	)
+	cmd.CheckError(err)
+	credentialSecretStore, err := credentials.NewNamespacedSecretStore(cli, f.Namespace())
+	cmd.CheckError(err)
+	// Initialize repo provider
+	repoProvider := provider.NewUnifiedRepoProvider(
+		credentials.CredentialGetter{
+			FromFile:   credentialFileStore,
+			FromSecret: credentialSecretStore,
+		}, o.RepoType, cli, logrus.New())
+	
+	// Get backupRepository
+	repo, err := repository.GetBackupRepository(context.Background(), cli, f.Namespace(),
+		repository.BackupRepositoryKey{
+			VolumeNamespace: o.VolumeNamespace,
+			BackupLocation:  o.BackupStorageLocation,
+			RepositoryType:  o.RepoType,
+		}, true)
+	cmd.CheckError(err)
+	// Get BSL
+	bsl := &velerov1api.BackupStorageLocation{}
+	cli.Get(context.Background(), client.ObjectKey{Namespace: f.Namespace(), Name: repo.Spec.BackupStorageLocation}, bsl)
+	cmd.CheckError(err)
+
+	para := provider.RepoParam{
+		BackupRepo:     repo,
+		BackupLocation: bsl,
+	}
+	// Connect
+	err = repoProvider.BoostRepoConnect(context.Background(), para)
+	cmd.CheckError(err)
+	// Prune repo
+	err = repoProvider.PruneRepo(context.Background(), para)
+	cmd.CheckError(err)
+	...
+}
+```
+
+### 2. Create Jobs by Repository Manager
+Currently, the backup repository controller will call the repository manager to do the `PruneRepo`, and Kopia or Restic maintenance is then finally called through multiple chain calls.
+
+We will keep using the `PruneRepo` function in the repository manager, but we cut off the multiple chain calls by creating a maintenance job.
+
+```golang
+func (m *manager) PruneRepo(repo *velerov1api.BackupRepository) error {
+	...
+	// writing the maintenance result into default terminationMessagePath
+	args := []string {
+	"-c",
+	"velero maintenance --repo-name=" + param.BackupRepo.Spec.VolumeNamespace + " --repo-type=" + param.BackupRepo.Spec.RepositoryType + " --backup-storage-location=" + param.BackupLocation.Name + " > /dev/termination-log",
+	}
+
+	env := &v1.EnvVar{Name: ProviderFile, Value: "/credentials/cloud"}
+	job := builder.ForJob(param.BackupRepo.Name, param.BackupRepo.Namespace, "velero/velero:maintenance", args).Env(env).
+		VolumeMounts(*builder.ForVolumeMount("cloud-credentials", "/credentials").Result()).
+		ServiceAccount("velero").Resources(&resources).
+		Volumes(*builder.ForVolume("cloud-credentials").SecretSource("cloud-credentials").Result()).Result()
+	...
+	// Maintenance job also inherits some other filed values from the Velero server pod in keeping with the setting of the Velero server pod
+	log.Debugf("Creating maintenance job: %v", job)
+	defer cancel()
+	if err := m.client.Create(pruneTimeOut, job); err != nil {
+		return errors.Wrap(err, "error to create maintenance job")
+	}
+
+	if err := builder.WaitForJobComplete(pruneTimeOut, m.client, job); err != nil {
+		return errors.Wrap(err, "error to wait for maintenance job complete")
+	}
+	...
+	return nil
+}
+```
+
+Now, the backup repository controller will call the repository manager to create one maintenance job and wait for the job to complete. The Kopia or Restic maintenance multiple chains are called by the job.
+
+### 3. Update the Result of the Maintenance Job into BackupRepository CR
+
+The backup repository controller will update the result of the maintenance job into the backup repository CR.
+
+For how to get the result of the maintenance job we could refer to [here](https://kubernetes.io/docs/tasks/debug/debug-application/determine-reason-pod-failure/#writing-and-reading-a-termination-message).
+
+After the maintenance job is finished, we could get the result of maintenance by getting the terminated message from the related pod:
+
+```golang
+func GetContainerTerminatedMessage(pod *v1.Pod) string {
+	...
+	for _, containerStatus := range pod.Status.ContainerStatuses {
+		if containerStatus.LastTerminationState.Terminated != nil {
+			return containerStatus.LastTerminationState.Terminated.Message
+		}
+	}
+	...
+	return ""
+}
+```
+Then we could update the status of backupRepository CR with the message.
+
+### 4. Add Setting for Maintenance Job Timeout
+Add one configuration for setting maintenance job timeout as below:
+```shell
+    velero server --maintenance-job-timeout $maintenance-job-timeout
+```
+Our default value is 0, which means we don't limit the timeout duration.
+
+### 5. Add Setting for Resource Usage of Maintenance
+Add one configuration for setting the resource limit of maintenance jobs as below:
+```shell
+    velero server --maintenance-job-cpu-request $cpu-request --maintenance-job-mem-request $mem-request --maintenance-job-cpu-limit $cpu-limit --maintenance-job-mem-limit $mem-limit
+```
+Our default value is 0, which means we don't limit the resources.
+
+### 6. Clear up Maintenance Jobs
+Add one configuration for the number of keeping latest failure maintenance jobs as below:
+```shell
+    velero server --keep-failed-maintenance-jobs $num
+```
+
+We would clean all completed jobs and keep the latest N failed jobs at the end of each round of reconciliation.
+
+```golang
+func clearFinishedJob(ctx context.Context, c client.Client, retainFailedJobs int) error {
+	jobList := batch.JobList{}
+	if err := c.List(ctx, &jobList); err != nil {
+		return errors.Wrap(err, "error listing jobs")
+	}
+
+	failedJobs := []batch.Job{}
+	for _, job := range jobList.Items {
+		for _, condition := range job.Status.Conditions {
+			if condition.Type == batch.JobComplete || condition.Type == batch.JobFailed {
+				if condition.Type == batch.JobFailed {
+					failedJobs = append(failedJobs, job)
+				}
+
+				if err := c.Delete(ctx, &job); err != nil {
+					return errors.Wrap(err, "error deleting job")
+				}
+			}
+		}
+	}
+
+	sort.Slice(failedJobs, func(i, j int) bool {
+		return failedJobs[i].Status.CompletionTime.Time.After(failedJobs[j].Status.CompletionTime.Time)
+	})
+
+	for i := retainFailedJobs; i < len(failedJobs); i++ {
+		if err := c.Delete(ctx, &failedJobs[i]); err != nil {
+			return errors.Wrap(err, "error deleting job")
+		}
+	}
+	return nil
+}
+``` 
+
+Roughly, the process is as follows:
+1. The backup repository controller will check the BackupRepository request in queue periodically.
+
+2. If the maintenance period of the repository checked by `runMaintenanceIfDue` in `Reconcile` is due, then the backup repository controller will call the Repository manager to execute `PruneRepo`
+
+3. The `PruneRepo` of the Repository manager will create one maintenance job, the resource usage would follow the setting in the Velero server, and the environment variable, service account, images, etc. would follow the Velero server pod.
+
+4. The maintenance job will execute the Velero maintenance command and write the result into the terminationMessagePath of the related pod.
+
+5. The backup repository controller will wait for the jobs to update the message field and phase in the status of `backuprepositories` CR after finishing maintenance by reading the job terminal message accordingly.
+
+6. The backup repository controller will clear jobs.
+
+## Prospects for Future Work
+Future work may focus on improving the efficiency of Velero maintenance through non-blocking parallel modes. Potential areas for enhancement include:
+
+**Non-blocking Mode**: Explore the implementation of a non-blocking mode for parallel maintenance to enhance overall efficiency.
+
+**Concurrency Control**: Investigate mechanisms for better concurrency control of different maintenance jobs.
+
+**Provider Support for Parallel Maintenance**: Evaluate the feasibility of parallel maintenance for different providers and address any compatibility issues.
+
+**Efficiency Improvements**: Investigate strategies to optimize maintenance efficiency without compromising reliability.
+
+By considering these areas, future iterations of Velero may benefit from enhanced parallelization and improved resource utilization during repository maintenance.
\ No newline at end of file