Skip to content

Commit

Permalink
Add Prometheus metric for number of clusters monitored (istio-ecosyst…
Browse files Browse the repository at this point in the history
…em#208)

- Add cli flag to enable metrics collection via prometheus.
- Add endpoint to expose metrics in prometheus format when enabled and noop when disabled.
- Add gauge to capture clusters monitored metric.
  • Loading branch information
adilfulara authored and psikka1 committed Jun 14, 2022
1 parent 3432f02 commit 3cc793a
Show file tree
Hide file tree
Showing 16 changed files with 253 additions and 81 deletions.
19 changes: 17 additions & 2 deletions admiral/cmd/admiral/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
log "github.com/sirupsen/logrus"
"os"
"os/signal"
"sync"
"syscall"
"time"

Expand Down Expand Up @@ -50,14 +51,27 @@ func GetRootCmd(args []string) *cobra.Command {
}

service := server.Service{}
metricsService := server.Service{}
opts.RemoteRegistry = remoteRegistry
ret_routes := routes.NewAdmiralAPIServer(&opts)

mainRoutes := routes.NewAdmiralAPIServer(&opts)
metricRoutes := routes.NewMetricsServer()

if err != nil {
log.Error("Error setting up server:", err.Error())
}

service.Start(ctx, 8080, ret_routes, routes.Filter, remoteRegistry)
wg := new(sync.WaitGroup)
wg.Add(2)
go func() {
metricsService.Start(ctx, 6900, metricRoutes, routes.Filter, remoteRegistry)
wg.Done()
}()
go func() {
service.Start(ctx, 8080, mainRoutes, routes.Filter, remoteRegistry)
wg.Done()
}()
wg.Wait()

log.WithFields(log.Fields{
"error": err.Error(),
Expand Down Expand Up @@ -116,6 +130,7 @@ func GetRootCmd(args []string) *cobra.Command {
"The order would be to use annotation specified as `env_key`, followed by label specified as `env_key` and then fallback to the label `env`")
rootCmd.PersistentFlags().StringVar(&params.LabelSet.GatewayApp, "gateway_app", "istio-ingressgateway",
"The the value of the `app` label to use to match and find the service that represents the ingress for cross cluster traffic (AUTO_PASSTHROUGH mode)")
rootCmd.PersistentFlags().BoolVar(&params.MetricsEnabled, "metrics", true, "Enable prometheus metrics collections")

return rootCmd
}
Expand Down
124 changes: 67 additions & 57 deletions admiral/pkg/apis/admiral/routes/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"testing"
)

func TestReturnSuccessGET (t *testing.T) {
func TestReturnSuccessGET(t *testing.T) {
url := "https://admiral.com/health"
opts := RouteOpts{}
r := httptest.NewRequest("GET", url, strings.NewReader(""))
Expand All @@ -28,7 +28,18 @@ func TestReturnSuccessGET (t *testing.T) {
assert.Equal(t, 200, resp.StatusCode)
}

func TestGetClusters (t *testing.T) {
func TestReturnSuccessMetrics(t *testing.T) {
url := "https://admiral.com/metrics"
r := httptest.NewRequest("GET", url, strings.NewReader(""))
w := httptest.NewRecorder()

Noop(w, r)
resp := w.Result()

assert.Equal(t, 200, resp.StatusCode)
}

func TestGetClusters(t *testing.T) {
url := "https://admiral.com/clusters"
opts := RouteOpts{
RemoteRegistry: &clusters.RemoteRegistry{
Expand All @@ -40,30 +51,30 @@ func TestGetClusters (t *testing.T) {
},
}
testCases := []struct {
name string
name string
remoteCluster map[string]*secret.RemoteCluster
expectedErr interface{}
statusCode int
expectedErr interface{}
statusCode int
}{
{
name: "success with two clusters case",
remoteCluster: map[string]*secret.RemoteCluster{
name: "success with two clusters case",
remoteCluster: map[string]*secret.RemoteCluster{
"cluster1": {},
},
expectedErr: []string{"cluster1"},
statusCode: 200,
expectedErr: []string{"cluster1"},
statusCode: 200,
},
{
name: "success with no cluster case",
remoteCluster: map[string]*secret.RemoteCluster{},
expectedErr: "No cluster is monitored by admiral",
statusCode: 200,
name: "success with no cluster case",
remoteCluster: map[string]*secret.RemoteCluster{},
expectedErr: "No cluster is monitored by admiral",
statusCode: 200,
},
}
//Run the test for every provided case
for _, c := range testCases {
t.Run(c.name, func(t *testing.T) {
r:= httptest.NewRequest("GET", url, strings.NewReader(""))
r := httptest.NewRequest("GET", url, strings.NewReader(""))
w := httptest.NewRecorder()
opts.RemoteRegistry.SecretController.Cs.RemoteClusters = c.remoteCluster
opts.GetClusters(w, r)
Expand All @@ -72,7 +83,7 @@ func TestGetClusters (t *testing.T) {
expectedOutput, _ := json.Marshal(c.expectedErr)
if bytes.Compare(body, expectedOutput) != 0 {
t.Errorf("Error mismatch. Got %v, want %v", string(body), c.expectedErr)
t.Errorf("%d",bytes.Compare(body, expectedOutput))
t.Errorf("%d", bytes.Compare(body, expectedOutput))
}
if c.statusCode != 200 && resp.StatusCode != c.statusCode {
t.Errorf("Status code mismatch. Got %v, want %v", resp.StatusCode, c.statusCode)
Expand All @@ -81,64 +92,64 @@ func TestGetClusters (t *testing.T) {
}
}

func TestGetServiceEntriesByCluster (t *testing.T) {
func TestGetServiceEntriesByCluster(t *testing.T) {
url := "https://admiral.com/cluster/cluster1/serviceentries"
opts := RouteOpts{
RemoteRegistry: &clusters.RemoteRegistry{},
}
fakeIstioClient := istiofake.NewSimpleClientset()
testCases := []struct {
name string
clusterName string
name string
clusterName string
remoteControllers map[string]*clusters.RemoteController
expectedErr string
statusCode int
expectedErr string
statusCode int
}{
{
name: "failure with admiral not monitored cluster",
clusterName: "bar",
name: "failure with admiral not monitored cluster",
clusterName: "bar",
remoteControllers: nil,
expectedErr: "Admiral is not monitoring cluster bar\n",
expectedErr: "Admiral is not monitoring cluster bar\n",
statusCode: 404,
},
{
name: "failure with cluster not provided request",
clusterName: "",
name: "failure with cluster not provided request",
clusterName: "",
remoteControllers: nil,
expectedErr: "Cluster name not provided as part of the request\n",
expectedErr: "Cluster name not provided as part of the request\n",
statusCode: 400,
},
{
name: "success with no service entry for cluster",
clusterName: "cluster1",
remoteControllers:map[string]*clusters.RemoteController{
name: "success with no service entry for cluster",
clusterName: "cluster1",
remoteControllers: map[string]*clusters.RemoteController{
"cluster1": &clusters.RemoteController{
ServiceEntryController: &istio.ServiceEntryController{
ServiceEntryController: &istio.ServiceEntryController{
IstioClient: fakeIstioClient,
},
},
},
expectedErr: "No service entries configured for cluster - cluster1",
statusCode: 200,
expectedErr: "No service entries configured for cluster - cluster1",
statusCode: 200,
},
{
name: "success with service entry for cluster",
clusterName: "cluster1",
remoteControllers: map[string]*clusters.RemoteController{
name: "success with service entry for cluster",
clusterName: "cluster1",
remoteControllers: map[string]*clusters.RemoteController{
"cluster1": &clusters.RemoteController{
ServiceEntryController: &istio.ServiceEntryController{
ServiceEntryController: &istio.ServiceEntryController{
IstioClient: fakeIstioClient,
},
},
},
expectedErr: "",
statusCode: 200,
expectedErr: "",
statusCode: 200,
},
}
//Run the test for every provided case
for _, c := range testCases {
t.Run(c.name, func(t *testing.T) {
r:= httptest.NewRequest("GET", url, nil)
r := httptest.NewRequest("GET", url, nil)
r = mux.SetURLVars(r, map[string]string{"clustername": c.clusterName})
w := httptest.NewRecorder()
opts.RemoteRegistry.RemoteControllers = c.remoteControllers
Expand All @@ -158,41 +169,41 @@ func TestGetServiceEntriesByCluster (t *testing.T) {
}
}

func TestGetServiceEntriesByIdentity (t *testing.T) {
func TestGetServiceEntriesByIdentity(t *testing.T) {
url := "https://admiral.com/identity/service1/serviceentries"
opts := RouteOpts{
RemoteRegistry: &clusters.RemoteRegistry{
AdmiralCache: &clusters.AdmiralCache{
SeClusterCache: common.NewMapOfMaps() ,
SeClusterCache: common.NewMapOfMaps(),
},
},
}
testCases := []struct {
name string
identity string
host string
name string
identity string
host string
expectedErr string
statusCode int
statusCode int
}{
{
name: "failure with identity not provided request",
identity: "",
host: "",
expectedErr: "Identity not provided as part of the request\n",
statusCode: 400,
name: "failure with identity not provided request",
identity: "",
host: "",
expectedErr: "Identity not provided as part of the request\n",
statusCode: 400,
},
{
name: "success with service entry for service",
identity: "meshhealthcheck",
host: "anil-test-bdds-10-k8s-e2e.intuit.services.mesh.meshhealthcheck.mesh",
expectedErr: "Identity not provided as part of the request\n",
statusCode: 200,
name: "success with service entry for service",
identity: "meshhealthcheck",
host: "anil-test-bdds-10-k8s-e2e.intuit.services.mesh.meshhealthcheck.mesh",
expectedErr: "Identity not provided as part of the request\n",
statusCode: 200,
},
}
//Run the test for every provided case
for _, c := range testCases {
t.Run(c.name, func(t *testing.T) {
r:= httptest.NewRequest("GET", url, nil)
r := httptest.NewRequest("GET", url, nil)
r = mux.SetURLVars(r, map[string]string{"identity": c.identity})
w := httptest.NewRecorder()
if c.host != "" {
Expand All @@ -210,4 +221,3 @@ func TestGetServiceEntriesByIdentity (t *testing.T) {
})
}
}

29 changes: 29 additions & 0 deletions admiral/pkg/apis/admiral/routes/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package routes
import (
"github.com/istio-ecosystem/admiral/admiral/pkg/apis/admiral/filters"
"github.com/istio-ecosystem/admiral/admiral/pkg/apis/admiral/server"
"github.com/istio-ecosystem/admiral/admiral/pkg/controller/common"
"github.com/prometheus/client_golang/prometheus/promhttp"
"k8s.io/client-go/tools/clientcmd"
"log"
"net/http"
)

var Filter = server.Filters{
Expand Down Expand Up @@ -47,3 +50,29 @@ func NewAdmiralAPIServer(opts *RouteOpts) server.Routes {
},
}
}

func NewMetricsServer() server.Routes {

if common.GetMetricsEnabled() {
return server.Routes{
server.Route{
Name: "Get metrics in prometheus format",
Method: "GET",
Pattern: "/metrics",
HandlerFunc: promhttp.Handler().ServeHTTP,
},
}
}
return server.Routes{
server.Route{
Name: "Noop metrics",
Method: "GET",
Pattern: "/metrics",
HandlerFunc: Noop,
},
}
}

func Noop(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}
2 changes: 1 addition & 1 deletion admiral/pkg/apis/admiral/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (s *Service) Start(ctx context.Context, port int, routes Routes, filter []F

s.server = http.Server{Addr: ":" + strconv.Itoa(port), Handler: router}

log.Printf("Starting admiral api server on port=%d", port)
log.Printf("Starting server on port=%d", port)
log.Fatalln(s.server.ListenAndServe())

}
Expand Down
26 changes: 13 additions & 13 deletions admiral/pkg/clusters/registry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -449,24 +449,24 @@ func TestUpdateCacheController(t *testing.T) {

//Struct of test case info. Name is required.
testCases := []struct {
name string
oldConfig *rest.Config
newConfig *rest.Config
clusterId string
name string
oldConfig *rest.Config
newConfig *rest.Config
clusterId string
shouldRefresh bool
}{
{
name: "Should update controller when kubeconfig changes",
oldConfig: originalConfig,
newConfig: changedConfig,
clusterId: "test.cluster",
name: "Should update controller when kubeconfig changes",
oldConfig: originalConfig,
newConfig: changedConfig,
clusterId: "test.cluster",
shouldRefresh: true,
},
{
name: "Should not update controller when kubeconfig doesn't change",
oldConfig: originalConfig,
newConfig: originalConfig,
clusterId: "test.cluster",
name: "Should not update controller when kubeconfig doesn't change",
oldConfig: originalConfig,
newConfig: originalConfig,
clusterId: "test.cluster",
shouldRefresh: false,
},
}
Expand All @@ -476,7 +476,7 @@ func TestUpdateCacheController(t *testing.T) {
t.Run(c.name, func(t *testing.T) {
hook := logTest.NewGlobal()
rr.RemoteControllers[c.clusterId].ApiServer = c.oldConfig.Host
d, err := admiral.NewDeploymentController(make(chan struct{}), &test.MockDeploymentHandler{}, c.oldConfig, time.Second*time.Duration(300))
d, err := admiral.NewDeploymentController(make(chan struct{}), &test.MockDeploymentHandler{}, c.oldConfig, time.Second*time.Duration(300))
if err != nil {
t.Fatalf("Unexpected error creating controller %v", err)
}
Expand Down
4 changes: 2 additions & 2 deletions admiral/pkg/clusters/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
type RemoteController struct {
ClusterID string
ApiServer string
StartTime time.Time
StartTime time.Time
GlobalTraffic *admiral.GlobalTrafficController
DeploymentController *admiral.DeploymentController
ServiceController *admiral.ServiceController
Expand Down Expand Up @@ -432,7 +432,7 @@ func (rh *RolloutHandler) Deleted(obj *argo.Rollout) {
// helper function to handle add and delete for RolloutHandler
func HandleEventForRollout(event admiral.EventType, obj *argo.Rollout, remoteRegistry *RemoteRegistry, clusterName string) {

log.Infof(LogFormat, event, "rollout", obj.Name, clusterName, "Received")
log.Infof(LogFormat, event, "rollout", obj.Name, clusterName, "Received")
globalIdentifier := common.GetRolloutGlobalIdentifier(obj)

if len(globalIdentifier) == 0 {
Expand Down
Loading

0 comments on commit 3cc793a

Please sign in to comment.