Skip to content

Commit

Permalink
Add maintenance job
Browse files Browse the repository at this point in the history
Signed-off-by: Ming Qiu <mqiu@vmware.com>
  • Loading branch information
qiuming-best committed Feb 29, 2024
1 parent 2a1ae0e commit f6bb743
Show file tree
Hide file tree
Showing 18 changed files with 1,633 additions and 33 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/7451-qiuming-best
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add repository maintenance job
22 changes: 22 additions & 0 deletions pkg/cmd/cli/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ import (
"strings"
"time"

"github.com/sirupsen/logrus"

"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"

"github.com/pkg/errors"
Expand All @@ -38,6 +41,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/cmd/util/output"
"github.com/vmware-tanzu/velero/pkg/install"
kubeutil "github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)

// Options collects all the options for installing Velero into a Kubernetes cluster.
Expand Down Expand Up @@ -84,6 +88,9 @@ type Options struct {
DefaultSnapshotMoveData bool
DisableInformerCache bool
ScheduleSkipImmediately bool
FormatFlag *logging.FormatFlag
LogLevelFlag *logging.LevelFlag
MaintenanceCfg repository.MaintenanceConfig
}

// BindFlags adds command line values to the options struct.
Expand Down Expand Up @@ -128,6 +135,13 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.DefaultSnapshotMoveData, "default-snapshot-move-data", o.DefaultSnapshotMoveData, "Bool flag to configure Velero server to move data by default for all snapshots supporting data movement. Optional.")
flags.BoolVar(&o.DisableInformerCache, "disable-informer-cache", o.DisableInformerCache, "Disable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable). Optional.")
flags.BoolVar(&o.ScheduleSkipImmediately, "schedule-skip-immediately", o.ScheduleSkipImmediately, "Skip the first scheduled backup immediately after creating a schedule. Default is false (don't skip).")
flags.Var(o.FormatFlag, "log-format", fmt.Sprintf("The format for log output. Valid values are %s.", strings.Join(o.FormatFlag.AllowedValues(), ", ")))
flags.Var(o.LogLevelFlag, "log-level", fmt.Sprintf("The level at which to log. Valid values are %s.", strings.Join(o.LogLevelFlag.AllowedValues(), ", ")))
flags.IntVar(&o.MaintenanceCfg.KeepLatestMaitenanceJobs, "keep-latest-maintenance-jobs", o.MaintenanceCfg.KeepLatestMaitenanceJobs, "Number of latest maintenance jobs to keep each repository. Optional.")
flags.StringVar(&o.MaintenanceCfg.CPURequest, "maintenance-job-cpu-request", o.MaintenanceCfg.CPURequest, "CPU request for maintenance jobs. Default is no limit.")
flags.StringVar(&o.MaintenanceCfg.MemRequest, "maintenance-job-mem-request", o.MaintenanceCfg.MemRequest, "Memory request for maintenance jobs. Default is no limit.")
flags.StringVar(&o.MaintenanceCfg.CPULimit, "maintenance-job-cpu-limit", o.MaintenanceCfg.CPULimit, "CPU limit for maintenance jobs. Default is no limit.")
flags.StringVar(&o.MaintenanceCfg.MemLimit, "maintenance-job-mem-limit", o.MaintenanceCfg.MemLimit, "Memory limit for maintenance jobs. Default is no limit.")
}

// NewInstallOptions instantiates a new, default InstallOptions struct.
Expand Down Expand Up @@ -157,6 +171,11 @@ func NewInstallOptions() *Options {
DefaultSnapshotMoveData: false,
DisableInformerCache: false,
ScheduleSkipImmediately: false,
FormatFlag: logging.NewFormatFlag(),
LogLevelFlag: logging.LogLevelFlag(logrus.InfoLevel),
MaintenanceCfg: repository.MaintenanceConfig{
KeepLatestMaitenanceJobs: repository.DefaultKeepLatestMaitenanceJobs,
},
}
}

Expand Down Expand Up @@ -224,6 +243,9 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
DefaultSnapshotMoveData: o.DefaultSnapshotMoveData,
DisableInformerCache: o.DisableInformerCache,
ScheduleSkipImmediately: o.ScheduleSkipImmediately,
FormatFlag: o.FormatFlag,
LogLevelFlag: o.LogLevelFlag,
MaintenanceCfg: o.MaintenanceCfg,
}, nil
}

Expand Down
164 changes: 164 additions & 0 deletions pkg/cmd/cli/repomantenance/maintenance.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package repomantenance

import (
"context"
"fmt"
"os"
"strings"

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/vmware-tanzu/velero/internal/credentials"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerocli "github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/repository/provider"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)

type Options struct {
RepoName string
BackupStorageLocation string
RepoType string
KeepLatestMaitenanceJobs int
LogLevelFlag *logging.LevelFlag
FormatFlag *logging.FormatFlag
}

func (o *Options) BindFlags(flags *pflag.FlagSet) {
flags.StringVar(&o.RepoName, "repo-name", "", "namespace of the pod/volume that the snapshot is for")
flags.StringVar(&o.BackupStorageLocation, "backup-storage-location", "", "backup's storage location name")
flags.StringVar(&o.RepoType, "repo-type", velerov1api.BackupRepositoryTypeKopia, "type of the repository where the snapshot is stored")
flags.Var(o.LogLevelFlag, "log-level", fmt.Sprintf("The level at which to log. Valid values are %s.", strings.Join(o.LogLevelFlag.AllowedValues(), ", ")))
flags.Var(o.FormatFlag, "log-format", fmt.Sprintf("The format for log output. Valid values are %s.", strings.Join(o.FormatFlag.AllowedValues(), ", ")))
}

func NewCommand(f velerocli.Factory) *cobra.Command {
o := &Options{
LogLevelFlag: logging.LogLevelFlag(logrus.InfoLevel),
FormatFlag: logging.NewFormatFlag(),
}
cmd := &cobra.Command{
Use: "repo-mantenance",
Hidden: true,
Short: "VELERO INTERNAL COMMAND ONLY - not intended to be run directly by users",
Run: func(c *cobra.Command, args []string) {
o.Run(f)
},
}

o.BindFlags(cmd.Flags())
return cmd
}

func checkError(err error, file *os.File) {
if err != nil {
if err != context.Canceled {
if _, errWrite := file.WriteString(fmt.Sprintf("An error occurred: %v \n", err)); errWrite != nil {
fmt.Fprintf(os.Stderr, "Failed to write error to termination log file: %v\n", errWrite)
}
file.Close()
os.Exit(1) // indicate the command executed failed
}
}
}

func (o *Options) Run(f velerocli.Factory) {
logger := logging.DefaultLogger(o.LogLevelFlag.Parse(), o.FormatFlag.Parse())
logger.SetOutput(os.Stdout)

errorFile, err := os.Create("/dev/termination-log")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create termination log file: %v\n", err)
return
}
defer errorFile.Close()

scheme := runtime.NewScheme()
err = velerov1api.AddToScheme(scheme)
checkError(err, errorFile)

err = v1.AddToScheme(scheme)
checkError(err, errorFile)

config, err := f.ClientConfig()
checkError(err, errorFile)

cli, err := client.New(config, client.Options{
Scheme: scheme,
})
checkError(err, errorFile)

err = o.runRepoPrune(cli, f.Namespace(), logger)
checkError(err, errorFile)
}

func (o *Options) runRepoPrune(cli client.Client, namesapce string, logger logrus.FieldLogger) error {

Check failure on line 103 in pkg/cmd/cli/repomantenance/maintenance.go

View workflow job for this annotation

GitHub Actions / Run Codespell

namesapce ==> namespace
credentialFileStore, err := credentials.NewNamespacedFileStore(
cli,
namesapce,

Check failure on line 106 in pkg/cmd/cli/repomantenance/maintenance.go

View workflow job for this annotation

GitHub Actions / Run Codespell

namesapce ==> namespace
"/tmp/credentials",
filesystem.NewFileSystem(),
)
if err != nil {
return errors.Wrap(err, "failed to create namespaced file store")
}

credentialSecretStore, err := credentials.NewNamespacedSecretStore(cli, f.Namespace())

Check failure on line 114 in pkg/cmd/cli/repomantenance/maintenance.go

View workflow job for this annotation

GitHub Actions / Run CI

undefined: f

Check failure on line 114 in pkg/cmd/cli/repomantenance/maintenance.go

View workflow job for this annotation

GitHub Actions / build

undefined: f
if err != nil {
return errors.Wrap(err, "failed to create namespaced secret store")
}

var repoProvider provider.Provider
if o.RepoType == velerov1api.BackupRepositoryTypeRestic {
repoProvider = provider.NewResticRepositoryProvider(credentialFileStore, filesystem.NewFileSystem(), logger)
} else {
repoProvider = provider.NewUnifiedRepoProvider(
credentials.CredentialGetter{
FromFile: credentialFileStore,
FromSecret: credentialSecretStore,
}, o.RepoType, cli, logger)
}

// backupRepository
repo, err := repository.GetBackupRepository(context.Background(), cli, f.Namespace(),

Check failure on line 131 in pkg/cmd/cli/repomantenance/maintenance.go

View workflow job for this annotation

GitHub Actions / Run CI

undefined: f

Check failure on line 131 in pkg/cmd/cli/repomantenance/maintenance.go

View workflow job for this annotation

GitHub Actions / build

undefined: f
repository.BackupRepositoryKey{
VolumeNamespace: o.RepoName,
BackupLocation: o.BackupStorageLocation,
RepositoryType: o.RepoType,
}, true)

if err != nil {
return errors.Wrap(err, "failed to get backup repository")
}

// bsl
bsl := &velerov1api.BackupStorageLocation{}
err = cli.Get(context.Background(), client.ObjectKey{Namespace: namesapce, Name: repo.Spec.BackupStorageLocation}, bsl)

Check failure on line 144 in pkg/cmd/cli/repomantenance/maintenance.go

View workflow job for this annotation

GitHub Actions / Run Codespell

namesapce ==> namespace
if err != nil {
return errors.Wrap(err, "failed to get backup storage location")
}

para := provider.RepoParam{
BackupRepo: repo,
BackupLocation: bsl,
}

err = repoProvider.BoostRepoConnect(context.Background(), para)
if err != nil {
return errors.Wrap(err, "failed to boost repo connect")
}

err = repoProvider.PruneRepo(context.Background(), para)
if err != nil {
return errors.Wrap(err, "failed to prune repo")
}
return nil
}
26 changes: 24 additions & 2 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
appsv1 "k8s.io/api/apps/v1"
batchv1api "k8s.io/api/batch/v1"
corev1api "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -135,6 +137,7 @@ type serverConfig struct {
defaultSnapshotMoveData bool
disableInformerCache bool
scheduleSkipImmediately bool
maintenanceCfg repository.MaintenanceConfig
}

func NewCommand(f client.Factory) *cobra.Command {
Expand Down Expand Up @@ -166,6 +169,9 @@ func NewCommand(f client.Factory) *cobra.Command {
defaultSnapshotMoveData: false,
disableInformerCache: defaultDisableInformerCache,
scheduleSkipImmediately: false,
maintenanceCfg: repository.MaintenanceConfig{
KeepLatestMaitenanceJobs: repository.DefaultKeepLatestMaitenanceJobs,
},
}
)

Expand Down Expand Up @@ -239,7 +245,15 @@ func NewCommand(f client.Factory) *cobra.Command {
command.Flags().BoolVar(&config.defaultSnapshotMoveData, "default-snapshot-move-data", config.defaultSnapshotMoveData, "Move data by default for all snapshots supporting data movement.")
command.Flags().BoolVar(&config.disableInformerCache, "disable-informer-cache", config.disableInformerCache, "Disable informer cache for Get calls on restore. With this enabled, it will speed up restore in cases where there are backup resources which already exist in the cluster, but for very large clusters this will increase velero memory usage. Default is false (don't disable).")
command.Flags().BoolVar(&config.scheduleSkipImmediately, "schedule-skip-immediately", config.scheduleSkipImmediately, "Skip the first scheduled backup immediately after creating a schedule. Default is false (don't skip).")

command.Flags().IntVar(&config.maintenanceCfg.KeepLatestMaitenanceJobs, "keep-latest-maintenance-jobs", config.maintenanceCfg.KeepLatestMaitenanceJobs, "Number of latest maintenance jobs to keep each repository. Optional.")
command.Flags().StringVar(&config.maintenanceCfg.CPURequest, "maintenance-job-cpu-request", config.maintenanceCfg.CPURequest, "CPU request for maintenance job. Default is no limit.")
command.Flags().StringVar(&config.maintenanceCfg.MemRequest, "maintenance-job-mem-request", config.maintenanceCfg.MemRequest, "Memory request for maintenance job. Default is no limit.")
command.Flags().StringVar(&config.maintenanceCfg.CPULimit, "maintenance-job-cpu-limit", config.maintenanceCfg.CPULimit, "CPU limit for maintenance job. Default is no limit.")
command.Flags().StringVar(&config.maintenanceCfg.MemLimit, "maintenance-job-mem-limit", config.maintenanceCfg.MemLimit, "Memory limit for maintenance job. Default is no limit.")

// maintenance job log setting inherited from velero server
config.maintenanceCfg.FormatFlag = config.formatFlag
config.maintenanceCfg.LogLevelFlag = logLevelFlag
return command
}

Expand Down Expand Up @@ -346,6 +360,14 @@ func newServer(f client.Factory, config serverConfig, logger *logrus.Logger) (*s
cancelFunc()
return nil, err
}
if err := batchv1api.AddToScheme(scheme); err != nil {
cancelFunc()
return nil, err
}
if err := appsv1.AddToScheme(scheme); err != nil {
cancelFunc()
return nil, err
}

ctrl.SetLogger(logrusr.New(logger))

Expand Down Expand Up @@ -642,7 +664,7 @@ func (s *server) initRepoManager() error {
s.repoLocker = repository.NewRepoLocker()
s.repoEnsurer = repository.NewEnsurer(s.mgr.GetClient(), s.logger, s.config.resourceTimeout)

s.repoManager = repository.NewManager(s.namespace, s.mgr.GetClient(), s.repoLocker, s.repoEnsurer, s.credentialFileStore, s.credentialSecretStore, s.logger)
s.repoManager = repository.NewManager(s.namespace, s.mgr.GetClient(), s.repoLocker, s.repoEnsurer, s.credentialFileStore, s.credentialSecretStore, s.config.maintenanceCfg, s.logger)

return nil
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/cmd/velero/velero.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/klog/v2"

"github.com/vmware-tanzu/velero/pkg/cmd/cli/debug"
"github.com/vmware-tanzu/velero/pkg/cmd/cli/repomantenance"

"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd/cli/backup"
Expand Down Expand Up @@ -122,6 +123,7 @@ operations can also be performed as 'velero backup get' and 'velero schedule cre
backuplocation.NewCommand(f),
snapshotlocation.NewCommand(f),
debug.NewCommand(f),
repomantenance.NewCommand(f),
)

// init and add the klog flags
Expand Down
Loading

0 comments on commit f6bb743

Please sign in to comment.