cisco-open · myungjin · Dec 5, 2022 · Dec 4, 2022
diff --git a/docs/03.A-fiab-linux.md → docs/03-a-ubuntu.md b/docs/03.A-fiab-linux.md → docs/03-a-ubuntu.md
diff --git a/docs/03.B-fiab-amzn-linux-gpu.md → docs/03-b-amzn2-gpu.md b/docs/03.B-fiab-amzn-linux-gpu.md → docs/03-b-amzn2-gpu.md
@@ -1,62 +1,84 @@
-# Fiab installation in Amazon Linux
+# Fiab installation in Amazon Linux 2
 This guideline is for configuring fiab in amazon linux 2 with GPU supported instance types (e.g., p2).
 
 ## Prerequisites
-This section is specifically for AWS EC2 instance with GPU. For general linux machines without GPU (regardless of VM or baremetal machine),
-the guideline for [linux](#linux) is recommended.
+This section is specifically for AWS EC2 instance with amazon linux 2 image and GPU.
+For other linux distributions without GPU (regardless of VM or baremetal machine), refer to [Ubuntu](03-a-ubuntu.md);
+with their respective package manager, the guideline for Ubuntu can be easily followed.
+
 For Amazon linux 2 image (amzn2), the following tools are necessary: `minikube`, `kubectl`, `helm`, `cri-dockerd`, `crictl` , `docker` and `jq`.
-The image was tested under an ec2 instance with GPU (e.g., p2 instances). The following procedures show how to enable GPU support in minikube:
+The image was tested under an ec2 instance with GPU (e.g., p2 instances).
 
-### Step 1: Install Docker CRI
-To install cri-dockerd
-1. Download cri-dockerd source code.
-``` bash
-sudo -i 
-git clone https://github.com/Mirantis/cri-dockerd.git 
+To set up fiab, run `install.sh` under the fiab folder.
+```bash
+cd fiab
+./install.sh amzn2
 ```
+**Note: If install.sh is executed, the below steps in the prerequisites section must be skipped.
+Go to the [staring minikube part](#Starting-minikube).**
 
-2. Install Golang and set up the compilation env.
-```
-wget https://storage.googleapis.com/golang/getgo/installer_linux 
-chmod +x ./installer_linux 
-./installer_linux 
-source ~/.bash_profile 
- ```
+This prerequisites part should be executed only once.
+The following shows the steps incorporated in the `install.sh` script,
+which can be manually followed to understand what the script does.
 
-3. Install and set up cri-dockerd via systemd.
- ```
-cd cri-dockerd 
-mkdir bin 
-go build -o bin/cri-dockerd 
-install -o root -g root -m 0755 bin/cri-dockerd /usr/bin/cri-dockerd 
-cp -a packaging/systemd/* /etc/systemd/system 
-systemctl daemon-reload 
-systemctl enable cri-docker.service 
-systemctl enable --now cri-docker.socket 
-exit
-```
-
-### Step 2: Install docker
+### Step 1: Install docker
 Install docker as per [this](https://docs.docker.com/engine/install/) document.
 
-### Step 3:Install crictl
-Download crictl tar file and install it.
-```
-VERSION="v1.25.0" 
-wget https://github.com/kubernetes-sigs/cri-tools/releases/download/$VERSION/crictl-$VERSION-linux-amd64.tar.gz 
-sudo tar zxvf crictl-$VERSION-linux-amd64.tar.gz -C /usr/local/bin 
-rm -f crictl-$VERSION-linux-amd64.tar.gz 
+### Step 2: Install Docker CRI
+``` bash
+# set up golang compilation env
+wget https://storage.googleapis.com/golang/getgo/installer_linux
+chmod +x ./installer_linux
+./installer_linux
+source ~/.bash_profile
+
+# download cri-docker
+git clone https://github.com/Mirantis/cri-dockerd.git
+cd cri-dockerd
+mkdir bin
+go build -o bin/cri-dockerd
+
+# install cri-docker
+sudo install -o root -g root -m 0755 bin/cri-dockerd /usr/bin/cri-dockerd
+sudo cp -a packaging/systemd/* /etc/systemd/system
+sudo systemctl daemon-reload
+sudo systemctl enable cri-docker.service
+sudo systemctl enable --now cri-docker.socket
 ```
 
-### Step 4: Installing minikube
-1. Install minukube
+### Step 3:Install crictl
+```bash
+# install crictl
+VERSION="v1.25.0"
+wget https://github.com/kubernetes-sigs/cri-tools/releases/download/$VERSION/crictl-$VERSION-linux-amd64.tar.gz
+sudo tar zxvf crictl-$VERSION-linux-amd64.tar.gz -C /usr/local/bin
+rm -f crictl-$VERSION-linux-amd64.tar.gz
+```
 
+### Step 4: Installing minikube, kubectl and helm
 ```bash
-curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-latest.x86_64.rpm 
-sudo rpm -Uvh minikube-latest.x86_64.rpm 
+# install minikube
+curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-latest.x86_64.rpm
+sudo rpm -Uvh minikube-latest.x86_64.rpm
+
+# install kubectl
+curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
+sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
+
+# install helm
+HELM_VERSION=v3.10.2-linux-amd64
+curl -LO https://get.helm.sh/helm-$HELM_VERSION-linux-amd64.tar.gz
+tar -zxvf helm-$HELM_VERSION-linux-amd64.tar.gz
+sudo mv linux-amd64/helm /usr/local/bin/helm
 ```
 
-2. Start Minikube
+## Starting minikube
+A minikube environment is resuable until it is deleted by executing `minikube delete`.
+If the minikube env is destroyed, this step needs to be executed.
+If it is stopped by running `sudo minikube stop`, one can simply restart it by running `sudo minikube start`
+without need to follow the steps below.
+
+### Step 1: Start minikube with none driver
 ```bash
 sudo minikube start --driver=none --apiserver-ips 127.0.0.1 --apiserver-name localhost --cni=bridge
 ```
@@ -67,37 +89,43 @@ Note: If `Exiting due to HOST_JUJU_LOCK_PERMISSION` error happens, run the follo
 sudo sysctl fs.protected_regular=0
 ```
 
-### Step 5: Install kubectl
-Run the following commands to install kubectl command:
+Run the following commands to ensure that `kubectl` can be executed without `sudo`:
 ```bash
-curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" 
-sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl 
+# remove any old config
+rm -rf $HOME/.kube $HOME/.minikube
+
+# transfer config to a normal user so that kubectl commands can be executed without sudo
+sudo cp -rf /root/.kube /root/.minikube $HOME
+sudo chown -R $USER $HOME/.kube $HOME/.minikube
+
+# update the cert file's location correctly
+sed -i 's@/root@'"$HOME"'@' $HOME/.kube/config
 ```
 
-### Step 6: Install NVIDIA'S device plugin
+### Step 2: Install NVIDIA'S device plugin
 1. If NVIDIA's GPU is available in the machine, run the following command to install nvidia device plugin:
 ```bash
-sudo kubectl create -f https://github.com/raw/NVIDIA/k8s-device-plugin/master/nvidia-device-plugin.yml 
+kubectl create -f https://github.com/raw/NVIDIA/k8s-device-plugin/master/nvidia-device-plugin.yml
 ```
+
 2. To check if GPUs are enabled, run the following command:
 ```bash
-sudo kubectl get nodes -ojson | jq .items[].status.capacity 
+kubectl get nodes -ojson | jq .items[].status.capacity
 ```
 An output should look similar to:
-```
+```console
 { 
-  "cpu": "4", 
-  "ephemeral-storage": "524275692Ki", 
-  "hugepages-1Gi": "0", 
-  "hugepages-2Mi": "0", 
-  "memory": "62766704Ki", 
-  "nvidia.com/gpu": "1", 
-  "pods": "110" 
-} 
+  "cpu": "4",
+  "ephemeral-storage": "524275692Ki",
+  "hugepages-1Gi": "0",
+  "hugepages-2Mi": "0",
+  "memory": "62766704Ki",
+  "nvidia.com/gpu": "1",
+  "pods": "110"
+}
 ```
 
-## Configuring addons
-
+### Step 3: Configuring addons
 Next, `ingress` and `ingress-dns` addons need to be installed with the following command:
 ```bash
 sudo minikube addons enable ingress
@@ -107,33 +135,18 @@ sudo minikube addons enable ingress-dns
 As a final step, a cert manager is needed to enable tls. The `setup-cert-manager.sh` script installs and configures a cert manager for
 selfsigned certificate creation. Run the following command:
 ```bash
-sudo ./setup-cert-manager.sh
+./setup-cert-manager.sh
 ```
 
-
-## Building flame
-A Docker daemon comes within the minikube VM. To build flame container image, set the environment variables with the following command.
-
-```bash
-eval $(minikube docker-env)
-```
-See [here](https://minikube.sigs.k8s.io/docs/handbook/pushing/#1-pushing-directly-to-the-in-cluster-docker-daemon-docker-env) for more details.
-
-To test the config, run the following:
-```bash
-sudo docker ps
-```
-This command will show containers within the minikube.
+## (Optional) Building flame container image
+To simply use flame, skip this step and go to the [starting flame step](#Starting-flame).
+Building flame container image is only needed if a developer makes changes in the source code and wants to test the local changes.
 
 In order to build flame container image, run the following:
 ```bash
-sudo ./build-image.sh
+./build-image.sh
 ```
 
-**Note**: This setup uses docker-daemon within the minikube VM, any downloaded or locally-built images will be gone when the VM is deleted
-(i.e., `minikube delete` is executed). Unless a fresh minikube instance is needed, simply stopping the minikube instance would be useful
-to save time for development and testing.
-
 To check the flame image built, run `docker images`. An output is similar to:
 ```bash
 REPOSITORY                                TAG       IMAGE ID       CREATED          SIZE
@@ -157,30 +170,19 @@ sudo minikube tunnel
 ```
 The tunnel creates a routable IP for deployment.
 
-
 To bring up flame and its dependent applications, `helm` is used.
 A shell script (`flame.sh`) to use helm is provided.
 Run the following command:
 ```bash
-sudo ./flame.sh start
-```
-During the configuration by `flame.sh`, it asks a password for sudo permission.
-The reason for this is to add a dns configuration in `/etc/resolver/flame-test`.
-When stopping flame, the script asks again a password to delete `/etc/resolver/flame-test`.
-
-The file may look like the following:
-```
-domain flame.test
-nameserver 192.168.64.62
-search_order 1
-timeout 5
+./flame.sh start
 ```
-Here `192.168.64.62` is minikube's IP address.
+The above command ensures that the latest official flame image from docker hub is used.
+To use a locally developed image, add `--local-img ` in the above command.
 
 ## Validating deployment
 To check deployment status, run the following command:
 ```bash
-sudo kubectl get pods -n flame
+kubectl get pods -n flame
 ```
 
 An example output looks like the following:
@@ -198,6 +200,9 @@ flame-notifier-cf4854cd9-g27wj      1/1     Running   0              7m5s
 postgres-7fd96c847c-6qdpv           1/1     Running   0              7m5s
 ```
 
+In amazon ec2, `flame.test` domain needs to be added to Route 53 with the minikube IP address,
+which can be obtained by running `minikube ip`. Without route 53 configuration, the following
+ping test will fail.
 As a way to test a successful configuration of routing and dns, test with the following commands:
 ```bash
 ping -c 1 apiserver.flame.test
@@ -209,7 +214,7 @@ That should return a mlflow's web page.
 
 ## Stopping flame
 ```bash
-sudo ./flame.sh stop
+./flame.sh stop
 ```
 Before starting flame again, make sure that all the pods in the flame namespace are deleted.
 To check that, use `kubectl get pods -n flame` command.
@@ -218,15 +223,15 @@ To check that, use `kubectl get pods -n flame` command.
 In kubernetes, a pod is the smallest, most basic deployable object. A pod consists of at least one container instance.
 Using the pod's name (e.g., `flame-apiserver-65d8c7fcf4-z8x5b`), one can log into the running pod as follows:
 ```bash
-sudo kubectl exec -it -n flame flame-apiserver-65d8c7fcf4-z8x5b -- bash
+kubectl exec -it -n flame flame-apiserver-65d8c7fcf4-z8x5b -- bash
 ```
 
 Logs of flame components are found at `/var/log/flame` in the instance.
 
 ## Creating flame config
 The following command creates `config.yaml` under `$HOME/.flame`.
 ```bash
-sudo ./build-config.sh
+./build-config.sh
 ```
 The flame CLI tool, `flamectl` uses the configuration file to interact with the flame system.
 In order to build, `flamectl`, run `make install` from the level folder (i.e., `flame`).
@@ -243,5 +248,5 @@ sudo minikube delete
 In order to run a sample mnist job, refer to instructions at [mnist example](04-examples.md#mnist).
 
 **Note**: By executing the above command, any downloaded or locally-built images are also deleted together when the VM is deleted.
-Unless a fresh minikube instance is needed, simply stopping the minikube (i.e., `minikube stop`) instance would be useful
+Unless a fresh minikube instance is needed, simply stopping the minikube (i.e., `sudo minikube stop`) instance would be useful
 to save time for development and testing.
diff --git a/docs/03.C-fiab-mac.md → docs/03-c-mac.md b/docs/03.C-fiab-mac.md → docs/03-c-mac.md
diff --git a/docs/03-fiab.md b/docs/03-fiab.md
@@ -16,6 +16,6 @@ The fiab env is also tested under linux distributions such as Ubuntu, Amazon Lin
 ## Fiab installation guideline
 Follow one of the links below that matches operating system under consideration:
 
-* [Linux](03.A-fiab-linux.md)
-* [Amazon Linux with GPU](03.B-fiab-amzn-linux-gpu.md)
-* [MAC OS](03.C-fiab-mac.md)
+* [Ubuntu](03-a-ubuntu.md)
+* [Amazon Linux2 with GPU](03-b-amzn2-gpu.md)
+* [MAC OS](03-c-mac.md)
diff --git a/fiab/flame.sh b/fiab/flame.sh
@@ -106,15 +106,23 @@ function post_start_config {
     minikube_ip=$(minikube ip)
 
     if [[ "$OSTYPE" == "linux-gnu"* ]]; then
-        subnet=$(ip a show | grep br- | grep inet | awk '{print $2}')
-        resolver_file=/etc/systemd/network/minikube.network
-        echo "[Match]" | sudo tee $resolver_file > /dev/null
-        echo "Name=br*" | sudo tee -a $resolver_file > /dev/null
-        echo "[Network]" | sudo tee -a $resolver_file > /dev/null
-        echo "Address=$subnet" | sudo tee -a $resolver_file > /dev/null
-        echo "DNS=$minikube_ip" | sudo tee -a $resolver_file > /dev/null
-        echo "Domains=~flame.test" | sudo tee -a $resolver_file > /dev/null
-        sudo systemctl restart systemd-networkd
+	os_id=$(grep '^ID=' /etc/os-release | sed 's/"//g' | cut -d= -f2)
+	case $os_id in
+	    "amzn")
+		echo "set flame.test domain with $minikube_ip in route 53"
+		;;
+	    *)
+		subnet=$(ip a show | grep br- | grep inet | awk '{print $2}')
+		resolver_file=/etc/systemd/network/minikube.network
+		echo "[Match]" | sudo tee $resolver_file > /dev/null
+		echo "Name=br*" | sudo tee -a $resolver_file > /dev/null
+		echo "[Network]" | sudo tee -a $resolver_file > /dev/null
+		echo "Address=$subnet" | sudo tee -a $resolver_file > /dev/null
+		echo "DNS=$minikube_ip" | sudo tee -a $resolver_file > /dev/null
+		echo "Domains=~flame.test" | sudo tee -a $resolver_file > /dev/null
+		sudo systemctl restart systemd-networkd
+		;;
+	esac
     elif [[ "$OSTYPE" == "darwin"* ]]; then
         resolver_file=/etc/resolver/flame-test
         echo "domain flame.test" | sudo tee $resolver_file > /dev/null
@@ -138,7 +146,7 @@ function post_start_config {
     echo "}" | tee -a $tmp_file > /dev/null
 
     # step 4: create patch file
-    echo "{\"data\": {\"Corefile\": $(jq -R -s < $tmp_file)}}" > $tmp_file
+    echo "{\"data\": {\"Corefile\": $(jq -R -s '.' < $tmp_file)}}" > $tmp_file
 
     # step 5: patch configmap of coredns with the updated dns entries
     kubectl patch configmap coredns \
@@ -173,9 +181,17 @@ function post_stop_cleanup {
     minikube_ip=$(minikube ip)
 
     if [[ "$OSTYPE" == "linux-gnu"* ]]; then
-        resolver_file=/etc/systemd/network/minikube.network
-        sudo rm -f $resolver_file
-        sudo systemctl restart systemd-networkd
+	os_id=$(grep '^ID=' /etc/os-release | sed 's/"//g' | cut -d= -f2)
+	case $os_id in
+	    "amzn")
+		echo "remove flame.test domain from route 53"
+		;;
+	    *)
+		resolver_file=/etc/systemd/network/minikube.network
+		sudo rm -f $resolver_file
+		sudo systemctl restart systemd-networkd
+		;;
+	esac
     elif [[ "$OSTYPE" == "darwin"* ]]; then
         resolver_file=/etc/resolver/flame-test
         sudo rm -f $resolver_file