Skip to content

Commit

Permalink
Merge pull request #8022 from tstromberg/better-restart
Browse files Browse the repository at this point in the history
restart: validate configs with new hostname, add logging
  • Loading branch information
medyagh authored May 7, 2020
2 parents 1c60122 + e9eabf1 commit 269b938
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 35 deletions.
17 changes: 12 additions & 5 deletions pkg/minikube/bootstrapper/bsutil/kverify/api_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package kverify
import (
"crypto/tls"
"fmt"
"io/ioutil"
"net"
"net/http"
"os/exec"
Expand Down Expand Up @@ -172,7 +173,7 @@ func APIServerStatus(cr command.Runner, hostname string, port int) (state.State,
if strings.Contains(rr.Stderr.String(), "freezer.state: No such file or directory\n") {
glog.Infof("unable to get freezer state (might be okay and be related to #770): %s", rr.Stderr.String())
} else {
glog.Warningf("unable to get freezer state : %s", rr.Stderr.String())
glog.Warningf("unable to get freezer state: %s", rr.Stderr.String())
}

return apiServerHealthz(hostname, port)
Expand Down Expand Up @@ -202,13 +203,19 @@ func apiServerHealthz(hostname string, port int) (state.State, error) {
glog.Infof("stopped: %s: %v", url, err)
return state.Stopped, nil
}

defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
glog.Warningf("unable to read response body: %s", err)
}

glog.Infof("%s returned %d:\n%s", url, resp.StatusCode, body)
if resp.StatusCode == http.StatusUnauthorized {
glog.Errorf("%s returned code %d (unauthorized). Please ensure that your apiserver authorization settings make sense!", url, resp.StatusCode)
return state.Error, nil
return state.Error, fmt.Errorf("%s returned code %d (unauthorized). Check your apiserver authorization settings:\n%s", url, resp.StatusCode, body)
}
if resp.StatusCode != http.StatusOK {
glog.Warningf("%s response: %v %+v", url, err, resp)
return state.Error, nil
return state.Error, fmt.Errorf("%s returned error %d:\n%s", url, resp.StatusCode, body)
}
return state.Running, nil
}
67 changes: 43 additions & 24 deletions pkg/minikube/bootstrapper/kubeadm/kubeadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,23 +133,37 @@ func (k *Bootstrapper) createCompatSymlinks() error {

// clearStaleConfigs clears configurations which may have stale IP addresses
func (k *Bootstrapper) clearStaleConfigs(cfg config.ClusterConfig) error {
cp, err := config.PrimaryControlPlane(&cfg)
if err != nil {
return err
}

// These are the files that kubeadm will reject stale versions of
paths := []string{
"/etc/kubernetes/admin.conf",
"/etc/kubernetes/kubelet.conf",
"/etc/kubernetes/controller-manager.conf",
"/etc/kubernetes/scheduler.conf",
}

endpoint := fmt.Sprintf("https://%s", net.JoinHostPort(cp.IP, strconv.Itoa(cp.Port)))
args := append([]string{"ls", "-la"}, paths...)
rr, err := k.c.RunCmd(exec.Command("sudo", args...))
if err != nil {
glog.Infof("config check failed, skipping stale config cleanup: %v", err)
return nil
}
glog.Infof("found existing configuration files:\n%s\n", rr.Stdout.String())

cp, err := config.PrimaryControlPlane(&cfg)
if err != nil {
return err
}

endpoint := fmt.Sprintf("https://%s", net.JoinHostPort(constants.ControlPlaneAlias, strconv.Itoa(cp.Port)))
for _, path := range paths {
_, err := k.c.RunCmd(exec.Command("sudo", "/bin/bash", "-c", fmt.Sprintf("grep %s %s || sudo rm -f %s", endpoint, path, path)))
_, err := k.c.RunCmd(exec.Command("sudo", "grep", endpoint, path))
if err != nil {
return err
glog.Infof("%q may not be in %s - will remove: %v", endpoint, path, err)

_, err := k.c.RunCmd(exec.Command("sudo", "rm", "-f", path))
if err != nil {
glog.Errorf("rm failed: %v", err)
}
}
}
return nil
Expand Down Expand Up @@ -292,6 +306,7 @@ func (k *Bootstrapper) StartCluster(cfg config.ClusterConfig) error {
if rerr == nil {
return nil
}

out.ErrT(out.Embarrassed, "Unable to restart cluster, will reset it: {{.error}}", out.V{"error": rerr})
if err := k.DeleteCluster(cfg.KubernetesConfig); err != nil {
glog.Warningf("delete failed: %v", err)
Expand Down Expand Up @@ -435,35 +450,36 @@ func (k *Bootstrapper) WaitForNode(cfg config.ClusterConfig, n config.Node, time
return waitErr
}

// needsReset returns whether or not the cluster needs to be reconfigured
func (k *Bootstrapper) needsReset(conf string, hostname string, port int, client *kubernetes.Clientset, version string) bool {
// needsReconfigure returns whether or not the cluster needs to be reconfigured
func (k *Bootstrapper) needsReconfigure(conf string, hostname string, port int, client *kubernetes.Clientset, version string) bool {
if rr, err := k.c.RunCmd(exec.Command("sudo", "diff", "-u", conf, conf+".new")); err != nil {
glog.Infof("needs reset: configs differ:\n%s", rr.Output())
glog.Infof("needs reconfigure: configs differ:\n%s", rr.Output())
return true
}

st, err := kverify.APIServerStatus(k.c, hostname, port)
if err != nil {
glog.Infof("needs reset: apiserver error: %v", err)
glog.Infof("needs reconfigure: apiserver error: %v", err)
return true
}

if st != state.Running {
glog.Infof("needs reset: apiserver in state %s", st)
glog.Infof("needs reconfigure: apiserver in state %s", st)
return true
}

if err := kverify.ExpectAppsRunning(client, kverify.AppsRunningList); err != nil {
glog.Infof("needs reset: %v", err)
glog.Infof("needs reconfigure: %v", err)
return true
}

if err := kverify.APIServerVersionMatch(client, version); err != nil {
glog.Infof("needs reset: %v", err)
glog.Infof("needs reconfigure: %v", err)
return true
}
// to be used in the ingeration test to verify it wont reset.
glog.Infof("The running cluster does not need a reset. hostname: %s", hostname)

// DANGER: This log message is hard-coded in an integration test!
glog.Infof("The running cluster does not require reconfiguration: %s", hostname)
return false
}

Expand Down Expand Up @@ -509,7 +525,7 @@ func (k *Bootstrapper) restartCluster(cfg config.ClusterConfig) error {

// If the cluster is running, check if we have any work to do.
conf := bsutil.KubeadmYamlPath
if !k.needsReset(conf, hostname, port, client, cfg.KubernetesConfig.KubernetesVersion) {
if !k.needsReconfigure(conf, hostname, port, client, cfg.KubernetesConfig.KubernetesVersion) {
glog.Infof("Taking a shortcut, as the cluster seems to be properly configured")
return nil
}
Expand All @@ -530,12 +546,15 @@ func (k *Bootstrapper) restartCluster(cfg config.ClusterConfig) error {
fmt.Sprintf("%s phase etcd local --config %s", baseCmd, conf),
}

glog.Infof("resetting cluster from %s", conf)
glog.Infof("reconfiguring cluster from %s", conf)
// Run commands one at a time so that it is easier to root cause failures.
for _, c := range cmds {
_, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", c))
if err != nil {
return errors.Wrap(err, "run")
if _, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", c)); err != nil {
glog.Errorf("%s failed - will try once more: %v", c, err)

if _, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", c)); err != nil {
return errors.Wrap(err, "run")
}
}
}

Expand Down Expand Up @@ -820,7 +839,7 @@ func (k *Bootstrapper) applyKICOverlay(cfg config.ClusterConfig) error {
return err
}

ko := path.Join(vmpath.GuestEphemeralDir, fmt.Sprintf("kic_overlay.yaml"))
ko := path.Join(vmpath.GuestEphemeralDir, "kic_overlay.yaml")
f := assets.NewMemoryAssetTarget(b.Bytes(), ko, "0644")

if err := k.c.Copy(f); err != nil {
Expand Down Expand Up @@ -890,7 +909,7 @@ func (k *Bootstrapper) elevateKubeSystemPrivileges(cfg config.ClusterConfig) err
rr, err := k.c.RunCmd(cmd)
if err != nil {
// Error from server (AlreadyExists): clusterrolebindings.rbac.authorization.k8s.io "minikube-rbac" already exists
if strings.Contains(rr.Output(), fmt.Sprintf("Error from server (AlreadyExists)")) {
if strings.Contains(rr.Output(), "Error from server (AlreadyExists)") {
glog.Infof("rbac %q already exists not need to re-create.", rbacName)
return nil
}
Expand Down
12 changes: 6 additions & 6 deletions test/integration/pause_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func TestPause(t *testing.T) {
validator validateFunc
}{
{"Start", validateFreshStart},
{"SecondStartNoReset", validateStartNoReset},
{"SecondStartNoReconfiguration", validateStartNoReconfigure},
{"Pause", validatePause},
{"Unpause", validateUnpause},
{"PauseAgain", validatePause},
Expand All @@ -65,22 +65,22 @@ func validateFreshStart(ctx context.Context, t *testing.T, profile string) {
}
}

// validateStartNoReset validates that starting a running cluster won't invoke a reset
func validateStartNoReset(ctx context.Context, t *testing.T, profile string) {
// validateStartNoReconfigure validates that starting a running cluster does not invoke reconfiguration
func validateStartNoReconfigure(ctx context.Context, t *testing.T, profile string) {
args := []string{"start", "-p", profile, "--alsologtostderr", "-v=5"}
rr, err := Run(t, exec.CommandContext(ctx, Target(), args...))
if err != nil {
defer clusterLogs(t, profile)
t.Fatalf("failed to second start a running minikube with args: %q : %v", rr.Command(), err)
}

if !NoneDriver() {
softLog := "The running cluster does not need a reset"
softLog := "The running cluster does not require reconfiguration"
if !strings.Contains(rr.Output(), softLog) {
defer clusterLogs(t, profile)
t.Errorf("expected the second start log outputs to include %q but got: %s", softLog, rr.Output())
t.Errorf("expected the second start log output to include %q but got: %s", softLog, rr.Output())
}
}

}

func validatePause(ctx context.Context, t *testing.T, profile string) {
Expand Down

0 comments on commit 269b938

Please sign in to comment.