Skip to content

Commit

Permalink
Merge pull request grafana#337 from xperimental/fix-duplicate-conditi…
Browse files Browse the repository at this point in the history
…on-5.8

[release-5.8] Backport metrics and status handling from 5.9
  • Loading branch information
openshift-merge-bot[bot] authored Jul 30, 2024
2 parents dfe73bf + b5b1e9b commit 628b254
Show file tree
Hide file tree
Showing 15 changed files with 503 additions and 417 deletions.
3 changes: 3 additions & 0 deletions operator/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Release 5.8.11

- **xperimental**: Backport status handling in controller
- [12212](https://github.com/grafana/loki/pull/12212) **xperimental**: Do not overwrite complete status when updating schemas
- [12228](https://github.com/grafana/loki/pull/12228) **xperimental**: Restructure LokiStack metrics
- [13512](https://github.com/grafana/loki/pull/13512) **xperimental**: feat(operator): Add alert for discarded samples

## Release 5.8.10
Expand Down
40 changes: 18 additions & 22 deletions operator/controllers/loki/lokistack_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"errors"
"time"

"github.com/ViaQ/logerr/v2/kverrors"
"github.com/go-logr/logr"
"github.com/google/go-cmp/cmp"
openshiftconfigv1 "github.com/openshift/api/config/v1"
Expand Down Expand Up @@ -146,40 +145,37 @@ func (r *LokiStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, nil
}

if r.FeatureGates.BuiltInCertManagement.Enabled {
err = handlers.CreateOrRotateCertificates(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates)
if err != nil {
return handleDegradedError(ctx, r.Client, req, err)
}
var degraded *status.DegradedError
err = r.updateResources(ctx, req)
switch {
case errors.As(err, &degraded):
// degraded errors are handled by status.Refresh below
case err != nil:
return ctrl.Result{}, err
}

err = handlers.CreateOrUpdateLokiStack(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates)
err = status.Refresh(ctx, r.Client, req, time.Now(), degraded)
if err != nil {
return handleDegradedError(ctx, r.Client, req, err)
return ctrl.Result{}, err
}

err = status.Refresh(ctx, r.Client, req, time.Now())
if err != nil {
return ctrl.Result{}, err
if degraded != nil {
return ctrl.Result{
Requeue: degraded.Requeue,
}, nil
}

return ctrl.Result{}, nil
}

func handleDegradedError(ctx context.Context, c client.Client, req ctrl.Request, err error) (ctrl.Result, error) {
var degraded *status.DegradedError
if errors.As(err, &degraded) {
err = status.SetDegradedCondition(ctx, c, req, degraded.Message, degraded.Reason)
if err != nil {
return ctrl.Result{}, kverrors.Wrap(err, "error setting degraded condition")
func (r *LokiStackReconciler) updateResources(ctx context.Context, req ctrl.Request) error {
if r.FeatureGates.BuiltInCertManagement.Enabled {
if err := handlers.CreateOrRotateCertificates(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates); err != nil {
return err
}

return ctrl.Result{
Requeue: degraded.Requeue,
}, nil
}

return ctrl.Result{}, err
return handlers.CreateOrUpdateLokiStack(ctx, r.Log, req, r.Client, r.Scheme, r.FeatureGates)
}

// SetupWithManager sets up the controller with the Manager.
Expand Down
7 changes: 0 additions & 7 deletions operator/internal/handlers/lokistack_create_or_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/grafana/loki/operator/internal/handlers/internal/storage"
"github.com/grafana/loki/operator/internal/handlers/internal/tlsprofile"
"github.com/grafana/loki/operator/internal/manifests"
"github.com/grafana/loki/operator/internal/metrics"
"github.com/grafana/loki/operator/internal/status"
)

Expand Down Expand Up @@ -208,12 +207,6 @@ func CreateOrUpdateLokiStack(
return kverrors.New("failed to configure lokistack resources", "name", req.NamespacedName)
}

// 1x.extra-small is used only for development, so the metrics will not
// be collected.
if opts.Stack.Size != lokiv1.SizeOneXExtraSmall && opts.Stack.Size != lokiv1.SizeOneXDemo {
metrics.Collect(&opts.Stack, opts.Name)
}

return nil
}

Expand Down
96 changes: 96 additions & 0 deletions operator/internal/metrics/lokistack.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package metrics

import (
"context"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
)

const (
metricsPrefix = "lokistack_"
)

var (
metricsCommonLabels = []string{
"stack_namespace",
"stack_name",
"size",
}

lokiStackInfoDesc = prometheus.NewDesc(
metricsPrefix+"info",
"Information about deployed LokiStack instances. Value is always 1.",
metricsCommonLabels, nil,
)

lokiStackConditionsCountDesc = prometheus.NewDesc(
metricsPrefix+"status_condition",
"Counts the current status conditions of the LokiStack.",
append(metricsCommonLabels, "condition", "reason", "status"), nil,
)
)

func RegisterLokiStackCollector(log logr.Logger, k8sClient client.Client, registry prometheus.Registerer) error {
metrics := &lokiStackCollector{
log: log,
k8sClient: k8sClient,
}

return registry.Register(metrics)
}

type lokiStackCollector struct {
log logr.Logger
k8sClient client.Client
}

func (l *lokiStackCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- lokiStackInfoDesc
ch <- lokiStackConditionsCountDesc
}

func (l *lokiStackCollector) Collect(m chan<- prometheus.Metric) {
ctx := context.TODO()

stackList := &lokiv1.LokiStackList{}
err := l.k8sClient.List(ctx, stackList)
if err != nil {
l.log.Error(err, "failed to get list of LokiStacks for metrics")
return
}

for _, stack := range stackList.Items {
labels := []string{
stack.Namespace,
stack.Name,
string(stack.Spec.Size),
}

m <- prometheus.MustNewConstMetric(lokiStackInfoDesc, prometheus.GaugeValue, 1.0, labels...)

for _, c := range stack.Status.Conditions {
activeValue := 0.0
if c.Status == metav1.ConditionTrue {
activeValue = 1.0
}

// This mirrors the behavior of kube_state_metrics, which creates two metrics for each condition,
// one for each status (true/false).
m <- prometheus.MustNewConstMetric(
lokiStackConditionsCountDesc,
prometheus.GaugeValue, activeValue,
append(labels, c.Type, c.Reason, "true")...,
)
m <- prometheus.MustNewConstMetric(
lokiStackConditionsCountDesc,
prometheus.GaugeValue, 1.0-activeValue,
append(labels, c.Type, c.Reason, "false")...,
)
}
}
}
93 changes: 93 additions & 0 deletions operator/internal/metrics/lokistack_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package metrics

import (
"context"
"io"
"strings"
"testing"

"github.com/ViaQ/logerr/v2/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
"github.com/grafana/loki/operator/internal/external/k8s/k8sfakes"
)

func TestRegisterLokiStackMetrics(t *testing.T) {
logger := log.NewLogger("test", log.WithOutput(io.Discard))
client := &k8sfakes.FakeClient{}
registry := prometheus.NewPedanticRegistry()

err := RegisterLokiStackCollector(logger, client, registry)
require.NoError(t, err)
}

func TestLokiStackMetricsCollect(t *testing.T) {
tt := []struct {
desc string
k8sError error
stacks *lokiv1.LokiStackList
wantMetrics string
}{
{
desc: "no stacks",
k8sError: nil,
stacks: &lokiv1.LokiStackList{},
wantMetrics: "",
},
{
desc: "one demo",
k8sError: nil,
stacks: &lokiv1.LokiStackList{
Items: []lokiv1.LokiStack{
{
ObjectMeta: metav1.ObjectMeta{
Name: "test-stack",
Namespace: "test-namespace",
},
Spec: lokiv1.LokiStackSpec{
Size: lokiv1.SizeOneXDemo,
},
},
},
},
wantMetrics: `# HELP lokistack_info Information about deployed LokiStack instances. Value is always 1.
# TYPE lokistack_info gauge
lokistack_info{size="1x.demo",stack_name="test-stack",stack_namespace="test-namespace"} 1
`,
},
}

for _, tc := range tt {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()

logger := log.NewLogger("test", log.WithOutput(io.Discard))
k := &k8sfakes.FakeClient{}
k.ListStub = func(_ context.Context, list client.ObjectList, _ ...client.ListOption) error {
if tc.k8sError != nil {
return tc.k8sError
}

k.SetClientObjectList(list, tc.stacks)
return nil
}

expected := strings.NewReader(tc.wantMetrics)

c := &lokiStackCollector{
log: logger,
k8sClient: k,
}

if err := testutil.CollectAndCompare(c, expected); err != nil {
t.Error(err)
}
})
}
}
Loading

0 comments on commit 628b254

Please sign in to comment.