From 466e6cccb3095c8908736d2b33a28ddc6aee91be Mon Sep 17 00:00:00 2001 From: Myungjin Lee Date: Tue, 28 Feb 2023 08:57:02 -0800 Subject: [PATCH] refactor+fix: configurable deployer / lib regularizer fix (#351) deployer's job template file is hard-coded, which makes it hard to use different template file at deployment time. Using different different template file is useful when underlying infrastructure is different (e.g., k8s vs knative). To support that, template folder and file is fed as config variables. Also, deployer's config info is fed as command argument, which is cumbersome. So, the config parsing part is refactored such that the info is fed as a configuration file. During the testing of deployer change, a bug in the library is identified. The fix for it is added here too. Finally, the local dns configuration in flame.sh is updated so that it can be done correctly across different linux distributions (e.g., archlinux and ubuntu). The tests for flame.sh are under archlinux and ubuntu. --- cmd/deployer/app/resource_handler.go | 53 +++-- cmd/deployer/cmd/root.go | 186 ++++++------------ cmd/deployer/config/config.go | 62 ++++++ fiab/flame.sh | 58 +++--- .../control/templates/deployer-configmap.yaml | 36 ++++ .../deployer-default-deployment.yaml | 30 ++- fiab/helm-chart/control/values.yaml | 8 +- .../deployer-compute1-deployment.yaml | 28 +-- .../templates/deployer-configmap.yaml | 36 ++++ fiab/helm-chart/deployer/values.yaml | 8 +- .../flame/optimizer/regularizer/default.py | 2 +- .../flame/optimizer/regularizer/fedprox.py | 11 +- 12 files changed, 303 insertions(+), 215 deletions(-) create mode 100644 cmd/deployer/config/config.go create mode 100644 fiab/helm-chart/control/templates/deployer-configmap.yaml create mode 100644 fiab/helm-chart/deployer/templates/deployer-configmap.yaml diff --git a/cmd/deployer/app/resource_handler.go b/cmd/deployer/app/resource_handler.go index 7052d4617..82305d56b 100644 --- a/cmd/deployer/app/resource_handler.go +++ b/cmd/deployer/app/resource_handler.go @@ -34,6 +34,7 @@ import ( "google.golang.org/grpc/credentials/insecure" "github.com/cisco-open/flame/cmd/deployer/app/deployer" + "github.com/cisco-open/flame/cmd/deployer/config" "github.com/cisco-open/flame/pkg/openapi" pbNotify "github.com/cisco-open/flame/pkg/proto/notification" "github.com/cisco-open/flame/pkg/restapi" @@ -41,13 +42,9 @@ import ( ) const ( - deploymentDirPath = "/" + util.ProjectName + "/deployment" deploymentTemplateDir = "templates" - jobTemplateDirPath = "/" + util.ProjectName + "/template" - jobDeploymentFilePrefix = "job-agent" - jobTemplatePath = jobTemplateDirPath + "/" + jobDeploymentFilePrefix + ".yaml.mustache" - k8sShortLabelLength = 12 + k8sShortLabelLength = 12 ) var ( @@ -63,13 +60,17 @@ type resourceHandler struct { namespace string dplyr deployer.Deployer + // variables for job templates + jobTemplateDirPath string + jobTemplatePath string + deploymentDirPath string + stream pbNotify.DeployEventRoute_GetDeployEventClient grpcDialOpt grpc.DialOption } -func NewResourceHandler(apiserverEp string, notifierEp string, computeSpec openapi.ComputeSpec, - platform string, namespace string, bInsecure bool, bPlain bool) *resourceHandler { +func NewResourceHandler(cfg *config.Config, computeSpec openapi.ComputeSpec, bInsecure bool, bPlain bool) *resourceHandler { var grpcDialOpt grpc.DialOption if bPlain { @@ -85,21 +86,28 @@ func NewResourceHandler(apiserverEp string, notifierEp string, computeSpec opena grpcDialOpt = grpc.WithTransportCredentials(credentials.NewTLS(tlsCfg)) } - dplyr, err := deployer.NewDeployer(platform) + dplyr, err := deployer.NewDeployer(cfg.Platform) if err != nil { zap.S().Errorf("failed to obtain a job deployer: %v", err) return nil } + parentDir := filepath.Dir(cfg.JobTemplate.Folder) + deploymentDirPath := filepath.Join(parentDir, "deployment") + rHandler := &resourceHandler{ - apiserverEp: apiserverEp, - notifierEp: notifierEp, + apiserverEp: cfg.Apiserver, + notifierEp: cfg.Notifier, spec: computeSpec, - platform: platform, - namespace: namespace, + platform: cfg.Platform, + namespace: cfg.Namespace, dplyr: dplyr, + jobTemplateDirPath: cfg.JobTemplate.Folder, + jobTemplatePath: filepath.Join(cfg.JobTemplate.Folder, cfg.JobTemplate.File), + deploymentDirPath: deploymentDirPath, + grpcDialOpt: grpcDialOpt, } @@ -249,7 +257,7 @@ func (r *resourceHandler) revokeResource(jobId string) (err error) { } taskStatuses[taskId] = openapi.AGENT_REVOKE_SUCCESS // 2.delete all the task resource specification files - deploymentChartPath := filepath.Join(deploymentDirPath, jobId, taskId) + deploymentChartPath := filepath.Join(r.deploymentDirPath, jobId, taskId) removeErr := os.RemoveAll(deploymentChartPath) if removeErr != nil { zap.S().Errorf("Errors occurred deleting specification files: %v", removeErr) @@ -323,11 +331,14 @@ func (r *resourceHandler) deployResources(deploymentConfig openapi.DeploymentCon errMsg := fmt.Sprintf("failed to initialize a job deployer: %v", err) return fmt.Errorf(errMsg) } + agentStatuses := map[string]openapi.AgentState{} defer r.postDeploymentStatus(deploymentConfig.JobId, agentStatuses) + for taskId := range deploymentConfig.AgentKVs { - deploymentChartPath := filepath.Join(deploymentDirPath, deploymentConfig.JobId, taskId) + deploymentChartPath := filepath.Join(r.deploymentDirPath, deploymentConfig.JobId, taskId) targetTemplateDirPath := filepath.Join(deploymentChartPath, deploymentTemplateDir) + if makeErr := os.MkdirAll(targetTemplateDirPath, util.FilePerm0644); makeErr != nil { errMsg := fmt.Sprintf("failed to create a deployment template folder: %v", makeErr) err = fmt.Errorf("%v; %v", err, errMsg) @@ -336,21 +347,20 @@ func (r *resourceHandler) deployResources(deploymentConfig openapi.DeploymentCon } // Copy helm chart files to destination folder - copyErr := copyHelmCharts(helmChartFiles, jobTemplateDirPath, deploymentChartPath) + copyErr := copyHelmCharts(helmChartFiles, r.jobTemplateDirPath, deploymentChartPath) if copyErr != nil { err = fmt.Errorf("%v; %v", err, copyErr) agentStatuses[taskId] = openapi.AGENT_DEPLOY_FAILED continue } - taskKey := deploymentConfig.AgentKVs[taskId] - ctx := map[string]string{ "imageLoc": deploymentConfig.ImageLoc, "taskId": taskId, - "taskKey": taskKey, + "taskKey": deploymentConfig.AgentKVs[taskId], } - rendered, renderErr := mustache.RenderFile(jobTemplatePath, &ctx) + + rendered, renderErr := mustache.RenderFile(r.jobTemplatePath, &ctx) if renderErr != nil { errMsg := fmt.Sprintf("failed to render a template for task %s: %v", taskId, renderErr) err = fmt.Errorf("%v; %v", err, errMsg) @@ -358,8 +368,9 @@ func (r *resourceHandler) deployResources(deploymentConfig openapi.DeploymentCon continue } - deploymentFileName := fmt.Sprintf("%s-%s.yaml", jobDeploymentFilePrefix, taskId) + deploymentFileName := fmt.Sprintf("task-%s.yaml", taskId) deploymentFilePath := filepath.Join(targetTemplateDirPath, deploymentFileName) + writeErr := os.WriteFile(deploymentFilePath, []byte(rendered), util.FilePerm0644) if writeErr != nil { errMsg := fmt.Sprintf("failed to write a job rosource spec %s: %v", taskId, writeErr) @@ -367,6 +378,7 @@ func (r *resourceHandler) deployResources(deploymentConfig openapi.DeploymentCon agentStatuses[taskId] = openapi.AGENT_DEPLOY_FAILED continue } + //using short id of task as label name does not support more than 35 characters installErr := r.dplyr.Install("job-"+deploymentConfig.JobId+"-"+taskId[:k8sShortLabelLength], deploymentChartPath) if installErr != nil { @@ -375,6 +387,7 @@ func (r *resourceHandler) deployResources(deploymentConfig openapi.DeploymentCon agentStatuses[taskId] = openapi.AGENT_DEPLOY_FAILED continue } + agentStatuses[taskId] = openapi.AGENT_DEPLOY_SUCCESS } diff --git a/cmd/deployer/cmd/root.go b/cmd/deployer/cmd/root.go index 059e46761..7452fd495 100644 --- a/cmd/deployer/cmd/root.go +++ b/cmd/deployer/cmd/root.go @@ -18,149 +18,89 @@ package cmd import ( "fmt" - "strings" + "path/filepath" "github.com/spf13/cobra" + "go.uber.org/zap" "github.com/cisco-open/flame/cmd/deployer/app" + "github.com/cisco-open/flame/cmd/deployer/config" "github.com/cisco-open/flame/pkg/openapi" "github.com/cisco-open/flame/pkg/util" ) const ( - argApiserver = "apiserver" - argNotifier = "notifier" - argAdminId = "adminid" - argRegion = "region" - argComputeId = "computeid" - argApiKey = "apikey" - argPlatform = "platform" - argNamespace = "namespace" - optionInsecure = "insecure" optionPlain = "plain" ) -var rootCmd = &cobra.Command{ - Use: util.Deployer, - Short: util.ProjectName + " Deployer", - RunE: func(cmd *cobra.Command, args []string) error { - flags := cmd.Flags() - - apiserver, err := flags.GetString(argApiserver) - if err != nil { - return err - } - if len(strings.Split(apiserver, ":")) != util.NumTokensInRestEndpoint { - return fmt.Errorf("incorrect format for apiserver endpoint: %s", apiserver) - } - - notifier, err := flags.GetString(argNotifier) - if err != nil { - return err - } - if len(strings.Split(notifier, ":")) != util.NumTokensInEndpoint { - return fmt.Errorf("incorrect format for notifier endpoint: %s", notifier) - } - - adminId, err := flags.GetString(argAdminId) - if err != nil { - return err - } - - region, err := flags.GetString(argRegion) - if err != nil { - return err - } - - computeId, err := flags.GetString(argComputeId) - if err != nil { - return err - } - - apikey, err := flags.GetString(argApiKey) - if err != nil { - return err - } - - platform, err := flags.GetString(argPlatform) - if err != nil { - return err - } - - namespace, err := flags.GetString(argNamespace) - if err != nil { - return err - } - - bInsecure, _ := flags.GetBool(optionInsecure) - bPlain, _ := flags.GetBool(optionPlain) - - if bInsecure && bPlain { - err = fmt.Errorf("options --%s and --%s are incompatible; enable one of them", optionInsecure, optionPlain) - return err - } - - computeSpec := openapi.ComputeSpec{ - AdminId: adminId, - Region: region, - ComputeId: computeId, - ApiKey: apikey, - } - - compute, err := app.NewCompute(apiserver, computeSpec, bInsecure, bPlain) - if err != nil { - return err - } - - err = compute.RegisterNewCompute() - if err != nil { - err = fmt.Errorf("unable to register new compute with controller: %s", err) - return err - } - - resoureHandler := app.NewResourceHandler(apiserver, notifier, computeSpec, platform, namespace, bInsecure, bPlain) - resoureHandler.Start() - - select {} - }, -} +var ( + cfgFile string + cfg *config.Config + + rootCmd = &cobra.Command{ + Use: util.Deployer, + Short: util.ProjectName + " Deployer", + RunE: func(cmd *cobra.Command, args []string) error { + flags := cmd.Flags() + + bInsecure, _ := flags.GetBool(optionInsecure) + bPlain, _ := flags.GetBool(optionPlain) + + if bInsecure && bPlain { + err := fmt.Errorf("options --%s and --%s are incompatible; enable one of them", + optionInsecure, optionPlain) + return err + } + + computeSpec := openapi.ComputeSpec{ + AdminId: cfg.AdminId, + Region: cfg.Region, + ComputeId: cfg.ComputeId, + ApiKey: cfg.Apikey, + } + + compute, err := app.NewCompute(cfg.Apiserver, computeSpec, bInsecure, bPlain) + if err != nil { + return err + } + + err = compute.RegisterNewCompute() + if err != nil { + err = fmt.Errorf("unable to register new compute with controller: %s", err) + return err + } + + resoureHandler := app.NewResourceHandler(cfg, computeSpec, bInsecure, bPlain) + resoureHandler.Start() + + select {} + }, + } +) func init() { - defaultApiServerEp := fmt.Sprintf("http://0.0.0.0:%d", util.ApiServerRestApiPort) - rootCmd.Flags().StringP(argApiserver, "a", defaultApiServerEp, "API server endpoint") - rootCmd.MarkFlagRequired(argApiserver) - - defaultNotifierEp := fmt.Sprintf("0.0.0.0:%d", util.NotifierGrpcPort) - rootCmd.Flags().StringP(argNotifier, "n", defaultNotifierEp, "Notifier endpoint") - rootCmd.MarkFlagRequired(argNotifier) - - defaultAdminId := "admin" - rootCmd.Flags().StringP(argAdminId, "d", defaultAdminId, "unique admin id") - rootCmd.MarkFlagRequired(argAdminId) - - defaultRegion := "region" - rootCmd.Flags().StringP(argRegion, "r", defaultRegion, "region name") - rootCmd.MarkFlagRequired(argRegion) + cobra.OnInitialize(initConfig) - defaultComputeId := "compute" - rootCmd.Flags().StringP(argComputeId, "c", defaultComputeId, "unique compute id") - rootCmd.MarkFlagRequired(argComputeId) + usage := "config file (default: /etc/flame/deployer.yaml)" + rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", usage) + rootCmd.CompletionOptions.DisableDefaultCmd = true - defaultApiKey := "apiKey" - rootCmd.Flags().StringP(argApiKey, "k", defaultApiKey, "unique apikey") - rootCmd.MarkFlagRequired(argApiKey) + rootCmd.PersistentFlags().Bool(optionInsecure, false, "Allow insecure connection") + rootCmd.PersistentFlags().Bool(optionPlain, false, "Allow unencrypted connection") +} - defaultPlatform := "k8s" - rootCmd.Flags().StringP(argPlatform, "p", defaultPlatform, "compute platform") - rootCmd.MarkFlagRequired(argPlatform) +func initConfig() { + if cfgFile == "" { + cfgFile = filepath.Join("/etc/flame/deployer.yaml") + } - defaultNamespace := "flame" - rootCmd.Flags().StringP(argNamespace, "s", defaultNamespace, "compute namespace") - rootCmd.MarkFlagRequired(argNamespace) + var err error - rootCmd.PersistentFlags().Bool(optionInsecure, false, "Allow insecure connection") - rootCmd.PersistentFlags().Bool(optionPlain, false, "Allow unencrypted connection") + cfg, err = config.LoadConfig(cfgFile) + if err != nil { + zap.S().Fatalf("Failed to load config %s: %v", cfgFile, err) + } } func Execute() error { diff --git a/cmd/deployer/config/config.go b/cmd/deployer/config/config.go new file mode 100644 index 000000000..aef9e1aa1 --- /dev/null +++ b/cmd/deployer/config/config.go @@ -0,0 +1,62 @@ +// Copyright 2023 Cisco Systems, Inc. and its affiliates +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package config + +import ( + "github.com/spf13/afero" + "gopkg.in/yaml.v3" +) + +type Config struct { + Apiserver string `yaml:"apiserver"` + Notifier string `yaml:"notifier"` + AdminId string `yaml:"adminId"` + Region string `yaml:"region"` + ComputeId string `yaml:"computeId"` + Apikey string `yaml:"apikey"` + Platform string `yaml:"platform"` + Namespace string `yaml:"namespace"` + + JobTemplate JobTemplate `yaml:"jobTemplate"` +} + +type JobTemplate struct { + Folder string `yaml:"folder"` + File string `yaml:"file"` +} + +var fs afero.Fs + +func init() { + fs = afero.NewOsFs() +} + +func LoadConfig(configPath string) (*Config, error) { + data, err := afero.ReadFile(fs, configPath) + if err != nil { + return nil, err + } + + cfg := &Config{} + + err = yaml.Unmarshal(data, cfg) + if err != nil { + return nil, err + } + + return cfg, nil +} diff --git a/fiab/flame.sh b/fiab/flame.sh index fbd837964..7d7021cf7 100755 --- a/fiab/flame.sh +++ b/fiab/flame.sh @@ -110,22 +110,26 @@ function post_start_config { if [[ "$OSTYPE" == "linux-gnu"* ]]; then os_id=$(grep '^ID=' /etc/os-release | sed 's/"//g' | cut -d= -f2) - case $os_id in - "amzn") - echo "set flame.test domain with $minikube_ip in route 53" - ;; - *) - subnet=$(ip a show | grep br- | grep inet | awk '{print $2}') - resolver_file=/etc/systemd/network/minikube.network - echo "[Match]" | sudo tee $resolver_file > /dev/null - echo "Name=br*" | sudo tee -a $resolver_file > /dev/null - echo "[Network]" | sudo tee -a $resolver_file > /dev/null - echo "Address=$subnet" | sudo tee -a $resolver_file > /dev/null - echo "DNS=$minikube_ip" | sudo tee -a $resolver_file > /dev/null - echo "Domains=~flame.test" | sudo tee -a $resolver_file > /dev/null - sudo systemctl restart systemd-networkd - ;; - esac + case $os_id in + "amzn") + echo "set flame.test domain with $minikube_ip in route 53" + ;; + *) + IFS=. read -r oc1 oc2 oc3 oc4 <<< $minikube_ip + subnet=$(ip a show | grep $oc1.$oc2.$oc3 | awk '{print $2}') + device=$(ip a show | grep -B 2 $oc1.$oc2.$oc3 | head -n 1 | cut -d':' -f 2) + device=${device## } + + resolver_file=/etc/systemd/network/minikube.network + echo "[Match]" | sudo tee $resolver_file > /dev/null + echo "Name=$device" | sudo tee -a $resolver_file > /dev/null + echo "[Network]" | sudo tee -a $resolver_file > /dev/null + echo "Address=$subnet" | sudo tee -a $resolver_file > /dev/null + echo "DNS=$minikube_ip" | sudo tee -a $resolver_file > /dev/null + echo "Domains=~flame.test" | sudo tee -a $resolver_file > /dev/null + sudo systemctl restart systemd-networkd + ;; + esac elif [[ "$OSTYPE" == "darwin"* ]]; then resolver_file=/etc/resolver/flame-test echo "domain flame.test" | sudo tee $resolver_file > /dev/null @@ -184,17 +188,17 @@ function post_stop_cleanup { minikube_ip=$(minikube ip) if [[ "$OSTYPE" == "linux-gnu"* ]]; then - os_id=$(grep '^ID=' /etc/os-release | sed 's/"//g' | cut -d= -f2) - case $os_id in - "amzn") - echo "remove flame.test domain from route 53" - ;; - *) - resolver_file=/etc/systemd/network/minikube.network - sudo rm -f $resolver_file - sudo systemctl restart systemd-networkd - ;; - esac + os_id=$(grep '^ID=' /etc/os-release | sed 's/"//g' | cut -d= -f2) + case $os_id in + "amzn") + echo "remove flame.test domain from route 53" + ;; + *) + resolver_file=/etc/systemd/network/minikube.network + sudo rm -f $resolver_file + sudo systemctl restart systemd-networkd + ;; + esac elif [[ "$OSTYPE" == "darwin"* ]]; then resolver_file=/etc/resolver/flame-test sudo rm -f $resolver_file diff --git a/fiab/helm-chart/control/templates/deployer-configmap.yaml b/fiab/helm-chart/control/templates/deployer-configmap.yaml new file mode 100644 index 000000000..b6fd8eca6 --- /dev/null +++ b/fiab/helm-chart/control/templates/deployer-configmap.yaml @@ -0,0 +1,36 @@ +# Copyright 2023 Cisco Systems, Inc. and its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-deployer-configmap + namespace: {{ .Release.Namespace }} +data: + deployer.yaml: | + --- + apiserver: "https://{{ .Values.frontDoorUrl.apiserver }}:443" + notifier: "{{ .Values.frontDoorUrl.notifier }}:443" + adminId: {{ .Values.deployer.adminId }} + region: {{ .Values.deployer.region }} + computeId: {{ .Values.deployer.computeId }} + apikey: {{ .Values.deployer.apiKey }} + platform: {{ .Values.deployer.platform }} + namespace: {{ .Values.deployer.namespace }} + jobTemplate: + folder: {{ .Values.deployer.jobTemplate.folder }} + file: {{ .Values.deployer.jobTemplate.file }} diff --git a/fiab/helm-chart/control/templates/deployer-default-deployment.yaml b/fiab/helm-chart/control/templates/deployer-default-deployment.yaml index d5058e2f4..2b4359640 100644 --- a/fiab/helm-chart/control/templates/deployer-default-deployment.yaml +++ b/fiab/helm-chart/control/templates/deployer-default-deployment.yaml @@ -32,22 +32,6 @@ spec: spec: containers: - args: - - --apiserver - - "https://{{ .Values.frontDoorUrl.apiserver }}:443" - - --notifier - - "{{ .Values.frontDoorUrl.notifier }}:443" - - --adminid - - {{ .Values.deployerDefault.adminId }} - - --region - - {{ .Values.deployerDefault.region }} - - --computeid - - {{ .Values.deployerDefault.computeId }} - - --apikey - - {{ .Values.deployerDefault.apiKey }} - - --platform - - {{ .Values.deployerDefault.platform }} - - --namespace - - {{ .Values.deployerDefault.namespace }} {{ if .Values.insecure }} - "--insecure" {{ end }} @@ -56,11 +40,19 @@ spec: imagePullPolicy: IfNotPresent name: {{ .Release.Name }}-deployer-default volumeMounts: - - mountPath: /flame/template + - mountPath: /etc/flame/deployer.yaml + name: config-volume + subPath: deployer.yaml + + - mountPath: {{ .Values.deployer.jobTemplate.folder }} name: job-template-volume - + serviceAccountName: deployer volumes: + - name: config-volume + configMap: + name: {{ .Release.Name }}-deployer-configmap + - name: job-template-volume configMap: - name: {{ .Release.Name }}-deployer-job-configmap \ No newline at end of file + name: {{ .Release.Name }}-deployer-job-configmap diff --git a/fiab/helm-chart/control/values.yaml b/fiab/helm-chart/control/values.yaml index f918be4b5..6f2dd5338 100644 --- a/fiab/helm-chart/control/values.yaml +++ b/fiab/helm-chart/control/values.yaml @@ -106,13 +106,19 @@ mlflow: s3EndpointUrl: http://minio.flame.test servicePort: "5000" -deployerDefault: +deployer: adminId: "admin-1" region: "default/us" computeId: "default" apiKey: "apiKey-default" platform: "k8s" namespace: "flame" + jobTemplate: + folder: /flame/template + # to use a different template file, put the file in the "job" folder + # use its file name as the value of key "file". + # also, update the name in the templates/deployer-job-configmap.yaml + file: job-agent.yaml.mustache servicePort: apiserver: "10100" diff --git a/fiab/helm-chart/deployer/templates/deployer-compute1-deployment.yaml b/fiab/helm-chart/deployer/templates/deployer-compute1-deployment.yaml index 740fdb2b6..d90bdf14c 100644 --- a/fiab/helm-chart/deployer/templates/deployer-compute1-deployment.yaml +++ b/fiab/helm-chart/deployer/templates/deployer-compute1-deployment.yaml @@ -32,22 +32,6 @@ spec: spec: containers: - args: - - --apiserver - - "https://{{ .Values.frontDoorUrl.apiserver }}:443" - - --notifier - - "{{ .Values.frontDoorUrl.notifier }}:443" - - --adminid - - {{ .Values.deployerCompute1.adminId }} - - --region - - {{ .Values.deployerCompute1.region }} - - --computeid - - {{ .Values.deployerCompute1.computeId }} - - --apikey - - {{ .Values.deployerCompute1.apiKey }} - - --platform - - {{ .Values.deployerCompute1.platform }} - - --namespace - - {{ .Values.deployerCompute1.namespace }} {{ if .Values.insecure }} - "--insecure" {{ end }} @@ -56,11 +40,19 @@ spec: imagePullPolicy: IfNotPresent name: {{ .Release.Name }}-deployer-compute1 volumeMounts: - - mountPath: /flame/template + - mountPath: /etc/flame/deployer.yaml + name: config-volume + subPath: deployer.yaml + + - mountPath: {{ .Values.deployer.jobTemplate.folder }} name: job-template-volume serviceAccountName: deployer volumes: + - name: config-volume + configMap: + name: {{ .Release.Name }}-deployer-configmap + - name: job-template-volume configMap: - name: {{ .Release.Name }}-deployer-job-configmap \ No newline at end of file + name: {{ .Release.Name }}-deployer-job-configmap diff --git a/fiab/helm-chart/deployer/templates/deployer-configmap.yaml b/fiab/helm-chart/deployer/templates/deployer-configmap.yaml new file mode 100644 index 000000000..b6fd8eca6 --- /dev/null +++ b/fiab/helm-chart/deployer/templates/deployer-configmap.yaml @@ -0,0 +1,36 @@ +# Copyright 2023 Cisco Systems, Inc. and its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-deployer-configmap + namespace: {{ .Release.Namespace }} +data: + deployer.yaml: | + --- + apiserver: "https://{{ .Values.frontDoorUrl.apiserver }}:443" + notifier: "{{ .Values.frontDoorUrl.notifier }}:443" + adminId: {{ .Values.deployer.adminId }} + region: {{ .Values.deployer.region }} + computeId: {{ .Values.deployer.computeId }} + apikey: {{ .Values.deployer.apiKey }} + platform: {{ .Values.deployer.platform }} + namespace: {{ .Values.deployer.namespace }} + jobTemplate: + folder: {{ .Values.deployer.jobTemplate.folder }} + file: {{ .Values.deployer.jobTemplate.file }} diff --git a/fiab/helm-chart/deployer/values.yaml b/fiab/helm-chart/deployer/values.yaml index b6cb37dbd..65fc60bf9 100644 --- a/fiab/helm-chart/deployer/values.yaml +++ b/fiab/helm-chart/deployer/values.yaml @@ -32,13 +32,19 @@ mlflow: s3EndpointUrl: http://minio.flame.test # TODO: fix s3 access id and key issue servicePort: "5000" -deployerCompute1: +deployer: adminId: "admin-2" region: "default/us/west" computeId: "compute-1" apiKey: "apiKey-1" platform: "k8s" namespace: "flame" + jobTemplate: + folder: /flame/template + # to use a different template file, put the file in the "job" folder + # use its file name as the value of key "file". + # also, update the name in the templates/deployer-job-configmap.yaml + file: job-agent.yaml.mustache servicePort: agent: "10103" diff --git a/lib/python/flame/optimizer/regularizer/default.py b/lib/python/flame/optimizer/regularizer/default.py index 1132e5151..662d24905 100644 --- a/lib/python/flame/optimizer/regularizer/default.py +++ b/lib/python/flame/optimizer/regularizer/default.py @@ -25,7 +25,7 @@ class Regularizer: def __init__(self): """Initialize Regularizer instance.""" pass - + def get_term(self, **kwargs): """No regularizer term for dummy regularizer.""" return 0.0 diff --git a/lib/python/flame/optimizer/regularizer/fedprox.py b/lib/python/flame/optimizer/regularizer/fedprox.py index c80582f33..ef018804f 100644 --- a/lib/python/flame/optimizer/regularizer/fedprox.py +++ b/lib/python/flame/optimizer/regularizer/fedprox.py @@ -15,7 +15,8 @@ # SPDX-License-Identifier: Apache-2.0 """FedProx Regularizer.""" import logging -from .regularizer import Regularizer + +from .default import Regularizer logger = logging.getLogger(__name__) @@ -27,13 +28,13 @@ def __init__(self, mu): """Initialize FedProxRegularizer instance.""" super().__init__() self.mu = mu - + def get_term(self, **kwargs): - """Calculate proximal term for client-side regularization""" + """Calculate proximal term for client-side regularization.""" import torch w = kwargs['w'] w_t = kwargs['w_t'] norm_sq = 0.0 for loc_param, glob_param in zip(w, w_t): - norm_sq += torch.sum(torch.pow(loc_param-glob_param, 2)) - return (self.mu/2) * norm_sq + norm_sq += torch.sum(torch.pow(loc_param - glob_param, 2)) + return (self.mu / 2) * norm_sq