Skip to content

Commit

Permalink
pmem-csi-driver: refactor storage capacity metrics code
Browse files Browse the repository at this point in the history
By moving the code into the device manager package it becomes
reusable, for example in upcoming tests.

While at it, the resync period gets moved and increased. The code
shouldn't depend on resyncing to catch up with the current state of
the world.
  • Loading branch information
pohly committed Jan 15, 2021
1 parent 09cc782 commit d7f67e1
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 80 deletions.
86 changes: 12 additions & 74 deletions pkg/pmem-csi-driver/pmem-csi-driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ const (
connectionTimeout time.Duration = 10 * time.Second
retryTimeout time.Duration = 10 * time.Second
requestTimeout time.Duration = 10 * time.Second

// Resyncing should never be needed for correct operation,
// so this is so high that it shouldn't matter in practice.
resyncPeriod = 10000 * time.Hour
)

type DriverMode string
Expand Down Expand Up @@ -86,27 +90,6 @@ var (
},
[]string{"version"},
)

pmemMaxDesc = prometheus.NewDesc(
"pmem_amount_max_volume_size",
"The size of the largest PMEM volume that can be created.",
nil, nil,
)
pmemAvailableDesc = prometheus.NewDesc(
"pmem_amount_available",
"Remaining amount of PMEM on the host that can be used for new volumes.",
nil, nil,
)
pmemManagedDesc = prometheus.NewDesc(
"pmem_amount_managed",
"Amount of PMEM on the host that is managed by PMEM-CSI.",
nil, nil,
)
pmemTotalDesc = prometheus.NewDesc(
"pmem_amount_total",
"Total amount of PMEM on the host.",
nil, nil,
)
)

func init() {
Expand Down Expand Up @@ -165,47 +148,6 @@ type csiDriver struct {
gatherers prometheus.Gatherers
}

// deviceManagerCollector is a wrapper around a PMEM device manager which
// takes GetCapacity values and turns them into metrics data.
type deviceManagerCollector struct {
pmdmanager.PmemDeviceManager
}

// Describe implements prometheus.Collector.Describe.
func (dm deviceManagerCollector) Describe(ch chan<- *prometheus.Desc) {
prometheus.DescribeByCollect(dm, ch)
}

// Collect implements prometheus.Collector.Collect.
func (dm deviceManagerCollector) Collect(ch chan<- prometheus.Metric) {
capacity, err := dm.GetCapacity()
if err != nil {
return
}
ch <- prometheus.MustNewConstMetric(
pmemMaxDesc,
prometheus.GaugeValue,
float64(capacity.MaxVolumeSize),
)
ch <- prometheus.MustNewConstMetric(
pmemAvailableDesc,
prometheus.GaugeValue,
float64(capacity.Available),
)
ch <- prometheus.MustNewConstMetric(
pmemManagedDesc,
prometheus.GaugeValue,
float64(capacity.Managed),
)
ch <- prometheus.MustNewConstMetric(
pmemTotalDesc,
prometheus.GaugeValue,
float64(capacity.Total),
)
}

var _ prometheus.Collector = deviceManagerCollector{}

func GetCSIDriver(cfg Config) (*csiDriver, error) {
var serverConfig *tls.Config
var clientConfig *tls.Config
Expand Down Expand Up @@ -322,8 +264,11 @@ func (csid *csiDriver) Run() error {
}

// Also run scheduler extender?
if _, err := csid.startScheduler(ctx, cancel, rs); err != nil {
return err
if csid.cfg.schedulerListen != "" {
c := scheduler.CapacityViaRegistry(rs)
if _, err := csid.startScheduler(ctx, cancel, c); err != nil {
return err
}
}
case Node:
dm, err := pmdmanager.New(csid.cfg.DeviceManager, csid.cfg.PmemPercentage)
Expand Down Expand Up @@ -364,9 +309,7 @@ func (csid *csiDriver) Run() error {
}

// Also collect metrics data via the device manager.
prometheus.WrapRegistererWith(prometheus.Labels{registryserver.NodeLabel: csid.cfg.NodeID}, prometheus.DefaultRegisterer).MustRegister(
deviceManagerCollector{dm},
)
pmdmanager.CapacityCollector{PmemDeviceCapacity: dm}.MustRegister(prometheus.DefaultRegisterer, csid.cfg.NodeID, csid.cfg.DriverName)
default:
return fmt.Errorf("Unsupported device mode '%v", csid.cfg.Mode)
}
Expand Down Expand Up @@ -428,18 +371,13 @@ func (csid *csiDriver) registerNodeController() error {
// logs errors and cancels the context when it runs into a problem,
// either during the startup phase (blocking) or later at runtime (in
// a go routine).
func (csid *csiDriver) startScheduler(ctx context.Context, cancel func(), rs *registryserver.RegistryServer) (string, error) {
if csid.cfg.schedulerListen == "" {
return "", nil
}

resyncPeriod := 1 * time.Hour
func (csid *csiDriver) startScheduler(ctx context.Context, cancel func(), c scheduler.Capacity) (string, error) {
factory := informers.NewSharedInformerFactory(csid.cfg.client, resyncPeriod)
pvcLister := factory.Core().V1().PersistentVolumeClaims().Lister()
scLister := factory.Storage().V1().StorageClasses().Lister()
sched, err := scheduler.NewScheduler(
csid.cfg.DriverName,
scheduler.CapacityViaRegistry(rs),
c,
csid.cfg.client,
pvcLister,
scLister,
Expand Down
86 changes: 86 additions & 0 deletions pkg/pmem-device-manager/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
Copyright 2020 Intel Corporation.
SPDX-License-Identifier: Apache-2.0
*/

package pmdmanager

import (
"github.com/prometheus/client_golang/prometheus"

"github.com/intel/pmem-csi/pkg/registryserver"
)

var (
pmemMaxDesc = prometheus.NewDesc(
"pmem_amount_max_volume_size",
"The size of the largest PMEM volume that can be created.",
nil, nil,
)
pmemAvailableDesc = prometheus.NewDesc(
"pmem_amount_available",
"Remaining amount of PMEM on the host that can be used for new volumes.",
nil, nil,
)
pmemManagedDesc = prometheus.NewDesc(
"pmem_amount_managed",
"Amount of PMEM on the host that is managed by PMEM-CSI.",
nil, nil,
)
pmemTotalDesc = prometheus.NewDesc(
"pmem_amount_total",
"Total amount of PMEM on the host.",
nil, nil,
)
)

// CapacityCollector is a wrapper around a PMEM device manager which
// takes GetCapacity values and turns them into metrics data.
type CapacityCollector struct {
PmemDeviceCapacity
}

// MustRegister adds the collector to the registry, using labels to tag each sample with node and driver name.
func (cc CapacityCollector) MustRegister(reg prometheus.Registerer, nodeName, driverName string) {
labels := prometheus.Labels{
registryserver.NodeLabel: nodeName,
"driver_name": driverName, // same label name as in csi-lib-utils for CSI gRPC calls
}
prometheus.WrapRegistererWith(labels, reg).MustRegister(cc)
}

// Describe implements prometheus.Collector.Describe.
func (cc CapacityCollector) Describe(ch chan<- *prometheus.Desc) {
prometheus.DescribeByCollect(cc, ch)
}

// Collect implements prometheus.Collector.Collect.
func (cc CapacityCollector) Collect(ch chan<- prometheus.Metric) {
capacity, err := cc.GetCapacity()
if err != nil {
return
}
ch <- prometheus.MustNewConstMetric(
pmemMaxDesc,
prometheus.GaugeValue,
float64(capacity.MaxVolumeSize),
)
ch <- prometheus.MustNewConstMetric(
pmemAvailableDesc,
prometheus.GaugeValue,
float64(capacity.Available),
)
ch <- prometheus.MustNewConstMetric(
pmemManagedDesc,
prometheus.GaugeValue,
float64(capacity.Managed),
)
ch <- prometheus.MustNewConstMetric(
pmemTotalDesc,
prometheus.GaugeValue,
float64(capacity.Total),
)
}

var _ prometheus.Collector = CapacityCollector{}
17 changes: 14 additions & 3 deletions pkg/pmem-device-manager/pmd-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,25 @@ type Capacity struct {
Total uint64
}

func (c Capacity) GetCapacity() (Capacity, error) {
return c, nil
}

var _ PmemDeviceCapacity = Capacity{}

// PmemDeviceCapacity interface just returns capacity information.
type PmemDeviceCapacity interface {
// GetCapacity returns information about local capacity.
GetCapacity() (Capacity, error)
}

//PmemDeviceManager interface to manage the PMEM block devices
type PmemDeviceManager interface {
PmemDeviceCapacity

// GetName returns current device manager's operation mode
GetMode() api.DeviceMode

// GetCapacity returns information about local capacity.
GetCapacity() (Capacity, error)

// CreateDevice creates a new block device with give name, size and namespace mode
// Possible errors: ErrNotEnoughSpace, ErrDeviceExists
CreateDevice(name string, size uint64) error
Expand Down
6 changes: 3 additions & 3 deletions pkg/scheduler/capacity.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ import (
"github.com/intel/pmem-csi/pkg/registryserver"
)

type capacity struct {
type capacityFromRegistry struct {
rs *registryserver.RegistryServer
}

func CapacityViaRegistry(rs *registryserver.RegistryServer) Capacity {
return capacity{rs}
return capacityFromRegistry{rs}
}

// NodeCapacity implements the necessary method for the NodeCapacity interface based
// on a registry server.
func (c capacity) NodeCapacity(nodeName string) (int64, error) {
func (c capacityFromRegistry) NodeCapacity(nodeName string) (int64, error) {
conn, err := c.rs.ConnectToNodeController(nodeName)
if err != nil {
return 0, fmt.Errorf("connect to PMEM-CSI on node %q: %v", nodeName, err)
Expand Down

0 comments on commit d7f67e1

Please sign in to comment.