Skip to content

Commit

Permalink
add missing metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
Suraiya-Hameed committed Mar 19, 2024
1 parent 759b60c commit 4fb19fa
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 45 deletions.
20 changes: 19 additions & 1 deletion controllers/secretproviderclasspodstatus_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ type SecretProviderClassPodStatusReconciler struct {
writer client.Writer
eventRecorder record.EventRecorder
driverName string
reporter StatsReporter
}

// New creates a new SecretProviderClassPodStatusReconciler
Expand All @@ -73,6 +74,10 @@ func New(driverName string, mgr manager.Manager, nodeID string) (*SecretProvider
kubeClient := kubernetes.NewForConfigOrDie(mgr.GetConfig())
eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "csi-secrets-store-controller"})
sr, err := newStatsReporter()
if err != nil {
return nil, err
}

return &SecretProviderClassPodStatusReconciler{
Client: mgr.GetClient(),
Expand All @@ -83,6 +88,7 @@ func New(driverName string, mgr manager.Manager, nodeID string) (*SecretProvider
writer: mgr.GetClient(),
eventRecorder: recorder,
driverName: driverName,
reporter: sr,
}, nil
}

Expand Down Expand Up @@ -217,7 +223,7 @@ func (r *SecretProviderClassPodStatusReconciler) ListOptionsLabelSelector() clie
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
// +kubebuilder:rbac:groups="storage.k8s.io",resources=csidrivers,verbs=get;list;watch,resourceNames=secrets-store.csi.k8s.io

func (r *SecretProviderClassPodStatusReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
func (r *SecretProviderClassPodStatusReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, e error) {
r.mutex.Lock()
defer r.mutex.Unlock()

Expand Down Expand Up @@ -266,6 +272,18 @@ func (r *SecretProviderClassPodStatusReconciler) Reconcile(ctx context.Context,
return ctrl.Result{}, nil
}

begin := time.Now()
providerName := string(spc.Spec.Provider)
namespace := spcPodStatus.Namespace
secretProviderClass := spc.Name
defer func() {
// if there is SecretObjects defined in the SPC, then report the metric if sync is successful
if e == nil && !res.Requeue {
r.reporter.ReportSyncSecretCtMetric(ctx, providerName, namespace, secretProviderClass)
r.reporter.ReportSyncSecretDuration(ctx, time.Since(begin).Seconds())
}
}()

// determine which pod volume this is associated with
podVol := k8sutil.SPCVolume(pod, r.driverName, spc.Name)
if podVol == nil {
Expand Down
80 changes: 80 additions & 0 deletions controllers/stats_reporter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers

import (
"context"
"runtime"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/global"
)

const (
scope = "sigs.k8s.io/secrets-store-csi-driver"
)

var (
providerKey = "provider"
osTypeKey = "os_type"
runtimeOS = runtime.GOOS
namespaceKey = "namespace"
spcKey = "secret_provider_class"
)

type reporter struct {
syncK8sSecretTotal metric.Int64Counter
syncK8sSecretDuration metric.Float64Histogram
}

type StatsReporter interface {
ReportSyncSecretCtMetric(ctx context.Context, provider, namespace, spc string)
ReportSyncSecretDuration(ctx context.Context, duration float64)
}

func newStatsReporter() (StatsReporter, error) {
var err error

r := &reporter{}
meter := global.Meter(scope)

if r.syncK8sSecretTotal, err = meter.Int64Counter("sync_k8s_secret", metric.WithDescription("Total number of k8s secrets synced")); err != nil {
return nil, err
}
if r.syncK8sSecretDuration, err = meter.Float64Histogram("sync_k8s_secret_duration_sec", metric.WithDescription("Distribution of how long it took to sync k8s secret")); err != nil {
return nil, err
}
return r, nil
}

func (r reporter) ReportSyncSecretCtMetric(ctx context.Context, provider, namespace, spc string) {
opt := metric.WithAttributes(
attribute.Key(providerKey).String(provider),
attribute.Key(osTypeKey).String(runtimeOS),
attribute.Key(namespaceKey).String(namespace),
attribute.Key(spcKey).String(spc),
)
r.syncK8sSecretTotal.Add(ctx, 1, opt)
}

func (r reporter) ReportSyncSecretDuration(ctx context.Context, duration float64) {
opt := metric.WithAttributes(
attribute.Key(osTypeKey).String(runtimeOS),
)
r.syncK8sSecretDuration.Record(ctx, duration, opt)
}
26 changes: 26 additions & 0 deletions docs/book/src/topics/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Prometheus is the only exporter that's currently supported with the driver.
| node_unpublish_total | Total number of successful volume unmount requests | `os_type=<runtime os>` |
| node_publish_error_total | Total number of errors with volume mount requests | `os_type=<runtime os>`<br>`provider=<provider name>`<br>`error_type=<error code>`<br>`pod_name=<pod_name>`<br>`pod_namespace=<pod_namespace>`<br>`secret_provider_class=<secret_provider_class>` |
| node_unpublish_error_total | Total number of errors with volume unmount requests | `os_type=<runtime os>` |
| sync_k8s_secret_total | Total number of k8s secrets synced | `os_type=<runtime os>`<br>`provider=<provider name>`<br>`namespace=<namespace>`<br>`secret_provider_class=<secret_provider_class>` |
| sync_k8s_secret_duration_sec | Distribution of how long it took to sync k8s secret | `os_type=<runtime os>` |
| rotation_reconcile_total | Total number of rotation reconciles | `os_type=<runtime os>`<br>`rotated=<true or false>`<br>`pod_name=<pod_name>`<br>`pod_namespace=<pod_namespace>`<br>`secret_provider_class=<secret_provider_class>` |
| rotation_reconcile_error_total | Total number of rotation reconciles with error | `os_type=<runtime os>`<br>`rotated=<true or false>`<br>`error_type=<error code>`<br>`pod_name=<pod_name>`<br>`pod_namespace=<pod_namespace>`<br>`secret_provider_class=<secret_provider_class>` |
| rotation_reconcile_duration_sec | Distribution of how long it took to rotate secrets-store content for pods | `os_type=<runtime os>`<br>`pod_name=<pod_name>`<br>`pod_namespace=<pod_namespace>`<br>`secret_provider_class=<secret_provider_class>` |
Expand All @@ -26,6 +28,30 @@ curl localhost:8095/metrics
### Sample Metrics output

```shell
# HELP sync_k8s_secret_duration_sec Distribution of how long it took to sync k8s secret
# TYPE sync_k8s_secret_duration_sec histogram
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.1"} 0
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.2"} 0
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.3"} 0
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.4"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.5"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="1"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="1.5"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="2"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="2.5"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="3"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="5"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="10"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="15"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="30"} 1
sync_k8s_secret_duration_sec_bucket{os_type="linux",le="+Inf"} 1
sync_k8s_secret_duration_sec_sum{os_type="linux"} 0.3115892
sync_k8s_secret_duration_sec_count{os_type="linux"} 1

# HELP sync_k8s_secret_total Total number of k8s secrets synced
# TYPE sync_k8s_secret_total counter
sync_k8s_secret_total{namespace="csi-test-secret-ns",os_type="linux",provider="azure",secret_provider_class="csi-test-spc"} 1

# HELP rotation_reconcile_duration_sec Distribution of how long it took to rotate secrets-store content for pods
# TYPE rotation_reconcile_duration_sec histogram
rotation_reconcile_duration_sec_bucket{os_type="linux",le="0.1"} 0
Expand Down
16 changes: 0 additions & 16 deletions pkg/secrets-store/mocks/stats_reporter_mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ type FakeReporter struct {
reportNodeUnPublishCtMetricInvoked int
reportNodePublishErrorCtMetricInvoked int
reportNodeUnPublishErrorCtMetricInvoked int
reportSyncK8SecretCtMetricInvoked int
reportSyncK8SecretDurationInvoked int
metricDetails []MetricDetails
}

Expand Down Expand Up @@ -71,14 +69,6 @@ func (f *FakeReporter) ReportNodeUnPublishErrorCtMetric(ctx context.Context) {
f.reportNodeUnPublishErrorCtMetricInvoked++
}

func (f *FakeReporter) ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int) {
f.reportSyncK8SecretCtMetricInvoked++
}

func (f *FakeReporter) ReportSyncK8SecretDuration(ctx context.Context, duration float64) {
f.reportSyncK8SecretDurationInvoked++
}

func (f *FakeReporter) ReportNodePublishCtMetricInvoked() int {
return f.reportNodePublishCtMetricInvoked
}
Expand All @@ -91,12 +81,6 @@ func (f *FakeReporter) ReportNodePublishErrorCtMetricInvoked() int {
func (f *FakeReporter) ReportNodeUnPublishErrorCtMetricInvoked() int {
return f.reportNodeUnPublishErrorCtMetricInvoked
}
func (f *FakeReporter) ReportSyncK8SecretCtMetricInvoked() int {
return f.reportSyncK8SecretCtMetricInvoked
}
func (f *FakeReporter) ReportSyncK8SecretDurationInvoked() int {
return f.reportSyncK8SecretDurationInvoked
}

func (f *FakeReporter) GetMetricDetails() []MetricDetails {
return f.metricDetails
Expand Down
28 changes: 0 additions & 28 deletions pkg/secrets-store/stats_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,13 @@ type reporter struct {
nodeUnPublishTotal metric.Int64Counter
nodePublishErrorTotal metric.Int64Counter
nodeUnPublishErrorTotal metric.Int64Counter
syncK8sSecretTotal metric.Int64Counter
syncK8sSecretDuration metric.Float64Histogram
}

type StatsReporter interface {
ReportNodePublishCtMetric(ctx context.Context, provider, podName, podNamespace, spc string)
ReportNodeUnPublishCtMetric(ctx context.Context)
ReportNodePublishErrorCtMetric(ctx context.Context, provider, podName, podNamespace, spc, errType string)
ReportNodeUnPublishErrorCtMetric(ctx context.Context)
ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int)
ReportSyncK8SecretDuration(ctx context.Context, duration float64)
}

func NewStatsReporter() (StatsReporter, error) {
Expand All @@ -75,12 +71,6 @@ func NewStatsReporter() (StatsReporter, error) {
if r.nodeUnPublishErrorTotal, err = meter.Int64Counter("node_unpublish_error", metric.WithDescription("Total number of node unpublish calls with error")); err != nil {
return nil, err
}
if r.syncK8sSecretTotal, err = meter.Int64Counter("sync_k8s_secret", metric.WithDescription("Total number of k8s secrets synced")); err != nil {
return nil, err
}
if r.syncK8sSecretDuration, err = meter.Float64Histogram("k8s_secret_duration_sec", metric.WithDescription("Distribution of how long it took to sync k8s secret")); err != nil {
return nil, err
}
return r, nil
}

Expand Down Expand Up @@ -120,21 +110,3 @@ func (r *reporter) ReportNodeUnPublishErrorCtMetric(ctx context.Context) {
)
r.nodeUnPublishErrorTotal.Add(ctx, 1, opt)
}

func (r *reporter) ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int) {
opt := metric.WithAttributes(
attribute.Key(providerKey).String(provider),
attribute.Key(osTypeKey).String(runtimeOS),
attribute.Key(podNameKey).String(podName),
attribute.Key(podNamespaceKey).String(podNamespace),
attribute.Key(spcKey).String(spc),
)
r.syncK8sSecretTotal.Add(ctx, int64(count), opt)
}

func (r *reporter) ReportSyncK8SecretDuration(ctx context.Context, duration float64) {
opt := metric.WithAttributes(
attribute.Key(osTypeKey).String(runtimeOS),
)
r.syncK8sSecretDuration.Record(ctx, duration, opt)
}
1 change: 1 addition & 0 deletions test/bats/e2e-provider.bats
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ export VALIDATE_TOKENS_AUDIENCE=$(get_token_requests_audience)
assert_match "node_publish_total" "${output}"
assert_match "node_unpublish_total" "${output}"
assert_match "rotation_reconcile_total" "${output}"
assert_match "sync_k8s_secret_total" "${output}"
done
}

Expand Down

0 comments on commit 4fb19fa

Please sign in to comment.