Merge pull request #9 from ahakanbaba/master

Add a whitelistpath env var option to the kube-applier
box · Apr 28, 2017 · 57e62be · 57e62be
2 parents f2bf390 + d27fdba
commit 57e62be
Show file tree

Hide file tree

Showing 10 changed files with 208 additions and 53 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 [![Project Status](http://opensource.box.com/badges/active.svg)](http://opensource.box.com/badges) [![Build Status](https://travis-ci.org/box/kube-applier.svg)](https://travis-ci.org/box/kube-applier)
 
-kube-applier is a service that enables continuous deployment of Kubernetes objects by applying declarative configuration files from a Git repository to a Kubernetes cluster. 
+kube-applier is a service that enables continuous deployment of Kubernetes objects by applying declarative configuration files from a Git repository to a Kubernetes cluster.
 
 kube-applier runs as a Pod in your cluster and watches the [Git repo](#mounting-the-git-repository) to ensure that the cluster objects are up-to-date with their associated spec files (JSON or YAML) in the repo.
 
@@ -39,12 +39,38 @@ We suggest running kube-applier as a Deployment (see [demo/](https://github.com/
 ### Environment Variables
 
 **Required:**
-* `REPO_PATH` - (string) Absolute path to the directory containing configuration files to be applied. It must be a Git repository or a path within one. All .json and .yaml files within this directory (and its subdirectories) will be applied, unless listed on the blacklist.
+* `REPO_PATH` - (string) Absolute path to the directory containing
+* configuration files to be applied. It must be a Git repository or a path
+* within one. All .json and .yaml files within this directory (and its
+* subdirectories) will be applied, unless listed on the blacklist or excluded
+* from the whitelist.
 * `LISTEN_PORT` - (int) Port for the container. This should be the same port specified in the container spec.
 
 **Optional:**
 * `SERVER` - (string) Address of the Kubernetes API server. By default, discovery of the API server is handled by kube-proxy. If kube-proxy is not set up, the API server address must be specified with this environment variable (which is then written into a [kubeconfig file](http://kubernetes.io/docs/user-guide/kubeconfig-file/) on the backend). Authentication to the API server is handled by service account tokens. See [Accessing the Cluster](http://kubernetes.io/docs/user-guide/accessing-the-cluster/#accessing-the-api-from-a-pod) for more info.
-* `BLACKLIST_PATH` - (string) Path to a "blacklist" file which specifies files that should not be applied. This path should be absolute (e.g. `/k8s/conf/kube_applier_blacklist`), not relative to `REPO_PATH` (although you may want to check the blacklist file into the repo). The blacklist file itself should be a plaintext file, with a file path on each line. Each of these paths should be relative to `REPO_PATH` (for example, if `REPO_PATH` is set to `/git/repo`, and the file to be blacklisted is `/git/repo/apps/app1.json`, the line in the blacklist file should be `apps/app1.json`).
+* `BLACKLIST_PATH` - (string) Path to a "blacklist" file which specifies files
+ that should not be applied. This path should be absolute (e.g.
+ `/k8s/conf/kube_applier_blacklist`), not relative to `REPO_PATH` (although
+ you may want to check the blacklist file into the repo). The blacklist file
+ itself should be a plaintext file, with a file path on each line. Each of
+ these paths should be relative to `REPO_PATH` (for example, if `REPO_PATH` is
+ set to `/git/repo`, and the file to be blacklisted is
+ `/git/repo/apps/app1.json`, the line in the blacklist file should be
+ `apps/app1.json`).
+* `WHITELIST_PATH` - (string) Path to a "whiltelist" file which is used to
+ make the applier consider a specific subset of files from the repo.
+ Only the files listed in the whitelist file will be considered for apply.
+ Empty whitelist (or unset env var) means all files in repo are eligible to be applied.
+ In case of a file is listed in both the whitelist and the blacklist, the file is
+ not applied.
+ This path should be absolute (e.g.
+ `/k8s/conf/kube_applier_whitelist`), not relative to `REPO_PATH` (although
+ you may want to check the whitelist file into the repo). The whitelist file
+ itself should be a plaintext file, with a file path on each line. Each of
+ these paths should be relative to `REPO_PATH` (for example, if `REPO_PATH` is
+ set to `/git/repo`, and the file to be whitelisted is
+ `/git/repo/apps/app1.json`, the line in the whiltelist file should be
+ `apps/app1.json`).
 * `POLL_INTERVAL_SECONDS` - (int) Number of seconds to wait between each check for new commits to the repo (default is 5). Set to 0 to disable the wait period.
 * <a name="run-interval"></a>`FULL_RUN_INTERVAL_SECONDS` - (int) Number of seconds between automatic full runs (default is 300, or 5 minutes). Set to 0 to disable the wait period.
 * `DIFF_URL_FORMAT` - (string) If specified, allows the status page to display a link to the source code referencing the diff for a specific commit. `DIFF_URL_FORMAT` should be a URL for a hosted remote repo that supports linking to a commit hash. Replace the commit hash portion with "%s" so it can be filled in by kube-applier (e.g. `https://github.com/kubernetes/kubernetes/commit/%s`).
@@ -76,7 +102,7 @@ Mount a Git repository from a host directory. This can be useful when you want k
 
 **What happens if the contents of the local Git repo change in the middle of a kube-applier run?**
 
-If there are changes to files in the `$REPO_PATH` directory during a kube-applier run, those changes may or may not be reflected in that run, depending on the timing of the changes. 
+If there are changes to files in the `$REPO_PATH` directory during a kube-applier run, those changes may or may not be reflected in that run, depending on the timing of the changes.
 
 Given that the `$REPO_PATH` directory is a Git repo or located within one, it is likely that the majority of changes will be associated with a Git commit. Thus, a change in the middle of a run will likely update the HEAD commit hash, which will immediately trigger another run upon completion of the current run (regardless of whether or not any of the changes were effective in the current run). However, changes that are not associated with a new Git commit will not trigger a run.
 
@@ -95,6 +121,7 @@ kube-applier hosts a status page on a webserver, served at the service endpoint
 * Start and end times
 * Latency
 * Most recent commit
+* Whitelisted files
 * Blacklisted files
 * Errors
 * Files applied successfully

diff --git a/applylist/factory.go b/applylist/factory.go
@@ -8,50 +8,69 @@ import (
 
 // FactoryInterface allows for mocking out the functionality of Factory when testing the full process of an apply run.
 type FactoryInterface interface {
-	Create() ([]string, []string, error)
+	Create() ([]string, []string, []string, error)
 }
 
 // Factory handles constructing the list of files to apply and the blacklist.
 type Factory struct {
 	RepoPath      string
 	BlacklistPath string
+	WhitelistPath string
 	FileSystem    sysutil.FileSystemInterface
 }
 
 // Create returns two alphabetically sorted lists: the list of files to apply, and the blacklist of files to skip.
-func (f *Factory) Create() ([]string, []string, error) {
+func (f *Factory) Create() ([]string, []string, []string, error) {
 	blacklist, err := f.createBlacklist()
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
-	applyList, err := f.createApplyList(blacklist)
+	whitelist, err := f.createWhitelist()
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
-	return applyList, blacklist, nil
+	applyList, err := f.createApplyList(blacklist, whitelist)
+	if err != nil {
+		return nil, nil, nil, err
+	}
+	return applyList, blacklist, whitelist, nil
 }
 
-// createBlacklist reads lines from the blacklist file, converts the relative paths to full paths, and returns a sorted list of full paths.
-func (f *Factory) createBlacklist() ([]string, error) {
-	if f.BlacklistPath == "" {
+// createFilelist reads lines from the given file, converts the relative
+// paths to full paths, and returns a sorted list of full paths.
+func (f *Factory) createFileList(listFilePath string) ([]string, error) {
+	if listFilePath == "" {
 		return []string{}, nil
 	}
-	rawBlacklist, err := f.FileSystem.ReadLines(f.BlacklistPath)
+	rawList, err := f.FileSystem.ReadLines(listFilePath)
 	if err != nil {
 		return nil, err
 	}
-	blacklist := prependToEachPath(f.RepoPath, rawBlacklist)
-	sort.Strings(blacklist)
-	return blacklist, nil
+	list := prependToEachPath(f.RepoPath, rawList)
+	sort.Strings(list)
+	return list, nil
+}
+
+// createBlacklist reads lines from the blacklist file, converts the relative
+// paths to full paths, and returns a sorted list of full paths.
+func (f *Factory) createBlacklist() ([]string, error) {
+	return f.createFileList(f.BlacklistPath)
+}
+
+// createWhitelist reads lines from the whitelist file, converts the relative
+// paths to full paths, and returns a sorted list of full paths.
+func (f *Factory) createWhitelist() ([]string, error) {
+	return f.createFileList(f.WhitelistPath)
 }
 
-// createApplyList gets all files within the repo directory and returns a filtered and sorted list of full paths.
-func (f *Factory) createApplyList(blacklist []string) ([]string, error) {
+// createApplyList gets all files within the repo directory and returns a
+// filtered and sorted list of full paths.
+func (f *Factory) createApplyList(blacklist, whitelist []string) ([]string, error) {
 	rawApplyList, err := f.FileSystem.ListAllFiles(f.RepoPath)
 	if err != nil {
 		return nil, err
 	}
-	applyList := filter(rawApplyList, blacklist)
+	applyList := filter(rawApplyList, blacklist, whitelist)
 	sort.Strings(applyList)
 	return applyList, nil
 }
@@ -60,19 +79,27 @@ func (f *Factory) createApplyList(blacklist []string) ([]string, error) {
 // Conditions for skipping the file path are:
 // 1. File path is not a .json or .yaml file
 // 2. File path is listed in the blacklist
-func shouldApplyPath(path string, blacklistMap map[string]struct{}) bool {
+func shouldApplyPath(path string, blacklistMap, whitelistMap map[string]struct{}) bool {
 	_, inBlacklist := blacklistMap[path]
+
+	// If whitelist is empty, essentially there is no whitelist.
+	inWhiteList := len(whitelistMap) == 0
+	if !inWhiteList {
+		_, inWhiteList = whitelistMap[path]
+	}
 	ext := filepath.Ext(path)
-	return !inBlacklist && (ext == ".json" || ext == ".yaml")
+	return inWhiteList && !inBlacklist && (ext == ".json" || ext == ".yaml")
 }
 
-// filter iterates through the list of all files in the repo and filters it down to a list of those that should be applied.
-func filter(rawApplyList, blacklist []string) []string {
+// filter iterates through the list of all files in the repo and filters it
+// down to a list of those that should be applied.
+func filter(rawApplyList, blacklist, whitelist []string) []string {
 	blacklistMap := stringSliceToMap(blacklist)
+	whitelistMap := stringSliceToMap(whitelist)
 
 	applyList := []string{}
 	for _, filePath := range rawApplyList {
-		if shouldApplyPath(filePath, blacklistMap) {
+		if shouldApplyPath(filePath, blacklistMap, whitelistMap) {
 			applyList = append(applyList, filePath)
 		}
 	}

diff --git a/applylist/factory_test.go b/applylist/factory_test.go
@@ -11,6 +11,7 @@ import (
 type testCase struct {
 	repoPath          string
 	blacklistPath     string
+	whitelistPath     string
 	fs                sysutil.FileSystemInterface
 	expectedApplyList []string
 	expectedBlacklist []string
@@ -26,78 +27,113 @@ func TestFactoryCreate(t *testing.T) {
 	gomock.InOrder(
 		fs.EXPECT().ReadLines("/blacklist").Times(1).Return(nil, fmt.Errorf("error")),
 	)
-	tc := testCase{"/repo", "/blacklist", fs, nil, nil, fmt.Errorf("error")}
+	tc := testCase{"/repo", "/blacklist", "/whitelist", fs, nil, nil, fmt.Errorf("error")}
 	createAndAssert(t, tc)
 
 	// ListAllFiles error -> return nil lists and error, ReadLines is called
 	gomock.InOrder(
 		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{}, nil),
 		fs.EXPECT().ListAllFiles("/repo").Times(1).Return(nil, fmt.Errorf("error")),
 	)
-	tc = testCase{"/repo", "/blacklist", fs, nil, nil, fmt.Errorf("error")}
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, nil, nil, fmt.Errorf("error")}
 	createAndAssert(t, tc)
 
 	// All lists and paths empty -> both lists empty, ReadLines not called
 	gomock.InOrder(
 		fs.EXPECT().ListAllFiles("").Times(1).Return([]string{}, nil),
 	)
-	tc = testCase{"", "", fs, []string{}, []string{}, nil}
+	tc = testCase{"", "", "", fs, []string{}, []string{}, nil}
 	createAndAssert(t, tc)
 
 	// Single .json file, empty blacklist -> file in applyList
 	gomock.InOrder(
 		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{}, nil),
 		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a.json"}, nil),
 	)
-	tc = testCase{"/repo", "/blacklist", fs, []string{"/repo/a.json"}, []string{}, nil}
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{"/repo/a.json"}, []string{}, nil}
 	createAndAssert(t, tc)
 
-	// Single .yaml file, empty blacklist -> file in applyList
+	// Single .yaml file, empty blacklist empty whitelist -> file in applyList
 	gomock.InOrder(
 		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{}, nil),
 		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a.yaml"}, nil),
 	)
-	tc = testCase{"/repo", "/blacklist", fs, []string{"/repo/a.yaml"}, []string{}, nil}
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{"/repo/a.yaml"}, []string{}, nil}
 	createAndAssert(t, tc)
 
-	// Single non-.json & non-.yaml file, empty blacklist -> file not in applyList
+	// Single non-.json & non-.yaml file, empty blacklist empty whitelist
+	// -> file not in applyList
 	gomock.InOrder(
 		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{}, nil),
 		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a"}, nil),
 	)
-	tc = testCase{"/repo", "/blacklist", fs, []string{}, []string{}, nil}
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{}, []string{}, nil}
 	createAndAssert(t, tc)
 
-	// Multiple files (mixed extensions), empty blacklist
+	// Multiple files (mixed extensions), empty blacklist, emptry whitelist
 	gomock.InOrder(
 		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{}, nil),
 		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a.json", "/repo/b.jpg", "/repo/a/b.yaml", "/repo/a/b"}, nil),
 	)
-	tc = testCase{"/repo", "/blacklist", fs, []string{"/repo/a.json", "/repo/a/b.yaml"}, []string{}, nil}
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{"/repo/a.json", "/repo/a/b.yaml"}, []string{}, nil}
 	createAndAssert(t, tc)
 
-	// Multiple files (mixed extensions), blacklist
+	// Multiple files (mixed extensions), blacklist, empty whitelist
 	gomock.InOrder(
 		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{"b.json", "b/c.json"}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{}, nil),
 		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a.json", "/repo/b.json", "/repo/a/b/c.yaml", "/repo/a/b", "/repo/b/c.json"}, nil),
 	)
-	tc = testCase{"/repo", "/blacklist", fs, []string{"/repo/a.json", "/repo/a/b/c.yaml"}, []string{"/repo/b.json", "/repo/b/c.json"}, nil}
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{"/repo/a.json", "/repo/a/b/c.yaml"}, []string{"/repo/b.json", "/repo/b/c.json"}, nil}
 	createAndAssert(t, tc)
 
 	// File in blacklist but not in repo
 	// (Ends up on returned blacklist anyway)
 	gomock.InOrder(
 		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{"a/b/c.yaml", "f.json"}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{}, nil),
 		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a/b.json", "/repo/b/c", "/repo/a/b/c.yaml", "/repo/a/b/c", "/repo/c.json"}, nil),
 	)
-	tc = testCase{"/repo", "/blacklist", fs, []string{"/repo/a/b.json", "/repo/c.json"}, []string{"/repo/a/b/c.yaml", "/repo/f.json"}, nil}
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{"/repo/a/b.json", "/repo/c.json"}, []string{"/repo/a/b/c.yaml", "/repo/f.json"}, nil}
+	createAndAssert(t, tc)
+
+	// Empty blacklist, valid whitelist all whitelist is in the repo
+	gomock.InOrder(
+		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{"a/b/c.yaml", "c.json"}, nil),
+		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a/b.json", "/repo/b/c", "/repo/a/b/c.yaml", "/repo/a/b/c", "/repo/c.json"}, nil),
+	)
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{"/repo/a/b/c.yaml", "/repo/c.json"}, []string{}, nil}
+	createAndAssert(t, tc)
+
+	// Empty blacklist, valid whitelist some whitelist is not included in repo
+	gomock.InOrder(
+		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{"a/b/c.yaml", "c.json", "someRandomFile.yaml"}, nil),
+		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a/b.json", "/repo/b/c", "/repo/a/b/c.yaml", "/repo/a/b/c", "/repo/c.json"}, nil),
+	)
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{"/repo/a/b/c.yaml", "/repo/c.json"}, []string{}, nil}
+	createAndAssert(t, tc)
+
+	// Both whitelist and blacklist contain the same file
+	gomock.InOrder(
+		fs.EXPECT().ReadLines("/blacklist").Times(1).Return([]string{"a/b/c.yaml"}, nil),
+		fs.EXPECT().ReadLines("/whitelist").Times(1).Return([]string{"a/b/c.yaml", "c.json"}, nil),
+		fs.EXPECT().ListAllFiles("/repo").Times(1).Return([]string{"/repo/a/b.json", "/repo/b/c", "/repo/a/b/c.yaml", "/repo/a/b/c", "/repo/c.json"}, nil),
+	)
+	tc = testCase{"/repo", "/blacklist", "/whitelist", fs, []string{"/repo/c.json"}, []string{"/repo/a/b/c.yaml"}, nil}
 	createAndAssert(t, tc)
 }
 
 func createAndAssert(t *testing.T, tc testCase) {
 	assert := assert.New(t)
-	f := &Factory{tc.repoPath, tc.blacklistPath, tc.fs}
-	applyList, blacklist, err := f.Create()
+	f := &Factory{tc.repoPath, tc.blacklistPath, tc.whitelistPath, tc.fs}
+	applyList, blacklist, _, err := f.Create()
 	assert.Equal(tc.expectedApplyList, applyList)
 	assert.Equal(tc.expectedBlacklist, blacklist)
 	assert.Equal(tc.expectedErr, err)

diff --git a/applylist/mock_factory.go b/applylist/mock_factory.go
@@ -28,12 +28,13 @@ func (_m *MockFactoryInterface) EXPECT() *_MockFactoryInterfaceRecorder {
 	return _m.recorder
 }
 
-func (_m *MockFactoryInterface) Create() ([]string, []string, error) {
+func (_m *MockFactoryInterface) Create() ([]string, []string, []string, error) {
 	ret := _m.ctrl.Call(_m, "Create")
 	ret0, _ := ret[0].([]string)
 	ret1, _ := ret[1].([]string)
-	ret2, _ := ret[2].(error)
-	return ret0, ret1, ret2
+	ret2, _ := ret[2].([]string)
+	ret3, _ := ret[3].(error)
+	return ret0, ret1, ret2, ret3
 }
 
 func (_mr *_MockFactoryInterfaceRecorder) Create() *gomock.Call {

diff --git a/main.go b/main.go
@@ -30,6 +30,11 @@ func main() {
 	listenPort := sysutil.GetRequiredEnvInt("LISTEN_PORT")
 	server := sysutil.GetEnvStringOrDefault("SERVER", "")
 	blacklistPath := sysutil.GetEnvStringOrDefault("BLACKLIST_PATH", "")
+
+	// A file that contains a list of files to consider for application.
+	// If the env var is not defined or if the file is empty act like a no-op and
+	// all files will be considered.
+	whitelistPath := sysutil.GetEnvStringOrDefault("WHITELIST_PATH", "")
 	diffURLFormat := sysutil.GetEnvStringOrDefault("DIFF_URL_FORMAT", "")
 	pollInterval := time.Duration(sysutil.GetEnvIntOrDefault("POLL_INTERVAL_SECONDS", defaultPollIntervalSeconds)) * time.Second
 	fullRunInterval := time.Duration(sysutil.GetEnvIntOrDefault("FULL_RUN_INTERVAL_SECONDS", defaultFullRunIntervalSeconds)) * time.Second
@@ -53,7 +58,7 @@ func main() {
 	batchApplier := &run.BatchApplier{kubeClient, metrics}
 	gitUtil := &git.GitUtil{repoPath}
 	fileSystem := &sysutil.FileSystem{}
-	listFactory := &applylist.Factory{repoPath, blacklistPath, fileSystem}
+	listFactory := &applylist.Factory{repoPath, blacklistPath, whitelistPath, fileSystem}
 
 	// Webserver and scheduler send run requests to runQueue channel, runner receives the requests and initiates runs.
 	// Only 1 pending request may sit in the queue at a time.