Skip to content

Commit

Permalink
Added metrics for LRU data cache
Browse files Browse the repository at this point in the history
Signed-off-by: Ashish Naware <ashishnaware3@gmail.com>
  • Loading branch information
AshishNaware committed Sep 17, 2024
1 parent 88036cd commit 6f1f0ce
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 16 deletions.
20 changes: 20 additions & 0 deletions docs/content/en/docs/reference/metrics.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 62 additions & 0 deletions pkg/observer/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package observer

import (
"fmt"

"github.com/cilium/tetragon/pkg/api/dataapi"
lru "github.com/hashicorp/golang-lru/v2"
)

type cache struct {
cache *lru.Cache[dataapi.DataEventId, []byte]
size int
}

// newCache constructs a cache of fixed size with the callback function that increments
// data_cache_evictions_total counter every time the cache is evicted.
func newCache(dataCacheSize int) (*cache, error) {
lruCache, err := lru.NewWithEvict(
dataCacheSize,
func(_ dataapi.DataEventId, _ []byte) {
dataCacheEvictions.Inc()
},
)
if err != nil {
return nil, err
}
cache := &cache{
cache: lruCache,
size: dataCacheSize,
}
return cache, nil
}

func (c *cache) get(dataEventId dataapi.DataEventId) ([]byte, error) {
data, ok := c.cache.Get(dataEventId)
if !ok {
dataCacheMisses.WithLabelValues("get").Inc()
return nil, fmt.Errorf("data event with id : %v not found", dataEventId)
}
return data, nil
}

func (c *cache) add(id dataapi.DataEventId, msgData []byte) bool {
evicted := c.cache.Add(id, msgData)
if !evicted {
dataCacheTotal.Inc()
}
return evicted
}

func (c *cache) remove(desc dataapi.DataEventDesc) bool {
present := c.cache.Remove(desc.Id)
if present {
dataCacheTotal.Dec()
} else {
dataCacheMisses.WithLabelValues("remove").Inc()
}
return present
}
30 changes: 14 additions & 16 deletions pkg/observer/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,37 +12,35 @@ import (
"github.com/cilium/tetragon/pkg/api/dataapi"
"github.com/cilium/tetragon/pkg/api/ops"
"github.com/cilium/tetragon/pkg/logger"
lru "github.com/hashicorp/golang-lru/v2"
)

func init() {
RegisterEventHandlerAtInit(ops.MSG_OP_DATA, HandleData)
}

var (
dataMap *lru.Cache[dataapi.DataEventId, []byte]
dataCache *cache
)

func InitDataCache(size int) error {
var err error

dataMap, err = lru.New[dataapi.DataEventId, []byte](size)
dataCache, err = newCache(size)
return err
}

func DataAdd(id dataapi.DataEventId, msgData []byte) error {
size := len(msgData)
data, ok := dataMap.Get(id)
if !ok {
dataMap.Add(id, msgData)
data, err := dataCache.get(id)
if err != nil {
dataCache.add(id, msgData)
DataEventMetricInc(DataEventAdded)
} else {
data = append(data, msgData...)
dataMap.Add(id, data)
dataCache.add(id, data)
DataEventMetricInc(DataEventAppended)
logger.GetLogger().WithFields(nil).Tracef("Data message received id %v, size %v, total %v", id, size, len(data))
}

logger.GetLogger().WithFields(nil).Tracef("Data message received id %v, size %v, total %v", id, size, len(data))
return nil
}

Expand All @@ -60,13 +58,13 @@ func add(r *bytes.Reader, m *dataapi.MsgData) error {
}

func DataGet(desc dataapi.DataEventDesc) ([]byte, error) {
data, ok := dataMap.Get(desc.Id)
if !ok {
data, err := dataCache.get(desc.Id)
if err != nil {
DataEventMetricInc(DataEventNotMatched)
return nil, fmt.Errorf("failed to find data for id: %v", desc.Id)
return nil, err
}

dataMap.Remove(desc.Id)
dataCache.remove(desc)

// make sure we did not loose anything on the way through ring buffer
if len(data) != int(desc.Size-desc.Leftover) {
Expand All @@ -88,18 +86,18 @@ func HandleData(r *bytes.Reader) ([]Event, error) {
m := dataapi.MsgData{}
err := binary.Read(r, binary.LittleEndian, &m)
if err != nil {
return nil, fmt.Errorf("Failed to read data msg")
return nil, fmt.Errorf("failed to read data msg")
}

err = add(r, &m)
if err != nil {
return nil, fmt.Errorf("Failed to add data msg")
return nil, fmt.Errorf("failed to add data msg")
}

// we don't send the event further
return nil, nil
}

func DataPurge() {
dataMap.Purge()
dataCache.cache.Purge()
}
51 changes: 51 additions & 0 deletions pkg/observer/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ const (
subsystem = "observer"
)

var (
operationLabel = metrics.ConstrainedLabel{
Name: "operation",
Values: []string{"get", "remove"},
}
)

var (
// TODO: These metrics are also stored as Observer struct fields. We could
// collect them only once: https://github.com/cilium/tetragon/issues/2834
Expand Down Expand Up @@ -53,12 +60,56 @@ var (
Help: "Number of perf events Tetragon ring buffer events queue lost.",
ConstLabels: nil,
})

dataCacheTotal = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: consts.MetricsNamespace,
Name: "data_cache_size",
Help: "The size of the data cache",
ConstLabels: nil,
})
dataCacheCapacity = metrics.MustNewCustomGauge(metrics.NewOpts(
consts.MetricsNamespace, "", "data_cache_capacity",
"The capacity of the data cache.",
nil, nil, nil,
))
dataCacheEvictions = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "data_cache_evictions_total",
Help: "Number of data cache LRU evictions.",
})
dataCacheMisses = metrics.MustNewCounter(metrics.NewOpts(
consts.MetricsNamespace, "",
"data_cache_misses_total",
"Number of data cache misses.",
nil,
[]metrics.ConstrainedLabel{operationLabel},
nil,
), nil)
)

func newCacheCollector() prometheus.Collector {
return metrics.NewCustomCollector(
metrics.CustomMetrics{dataCacheCapacity},
func(ch chan<- prometheus.Metric) {
capacity := 0
if dataCache != nil {
capacity = dataCache.size
}
ch <- dataCacheCapacity.MustMetric(float64(capacity))
},
nil,
)
}

func RegisterHealthMetrics(group metrics.Group) {
group.MustRegister(RingbufReceived)
group.MustRegister(RingbufLost)
group.MustRegister(RingbufErrors)
group.MustRegister(queueReceived)
group.MustRegister(queueLost)
group.MustRegister(
dataCacheTotal,
dataCacheEvictions,
dataCacheMisses,
newCacheCollector())
}

0 comments on commit 6f1f0ce

Please sign in to comment.