diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..f2d90cb --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.linting.enabled": true +} \ No newline at end of file diff --git a/README.md b/README.md index 8e85b62..a228b4e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # vsanmetrics -vsanmetrics is a tool written in Python for collecting usage and performance metrics from a VMware vSAN cluster and translating them in [InfluxDB's line protocol](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md). +vsanmetrics is a tool written in Python for collecting usage and performance metrics and health status from a VMware vSAN cluster and translating them in [InfluxDB's line protocol](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md). It can be useful to send metrics in a time-serie database like [InfluxDB](https://www.influxdata.com/) or [Graphite](https://graphiteapp.org/) with the help of [Telegraf](https://www.influxdata.com/time-series-platform/telegraf/) and then display metrics in [Grafana](https://grafana.com/). @@ -32,7 +32,7 @@ A detailed list of all entities types and metrics is available [here](entities.m % ./vsanmetrics -h usage: vsanmetrics.py [-h] -s VCENTER [-o PORT] -u USER [-p PASSWORD] -c - CLUSTERNAME [--performance] [--capacity] + CLUSTERNAME [--performance] [--capacity] [--health] [--skipentitytypes SKIPENTITYTYPES] Export vSAN cluster performance and storage usage statistics to InfluxDB line @@ -50,6 +50,7 @@ optional arguments: Cluster Name --performance Output performance metrics --capacity Output storage usage metrics + --health Output cluster health status --skipentitytypes SKIPENTITYTYPES List of entity types to skip. Separated by a comma ``` @@ -136,7 +137,7 @@ Here is an example of a working telegraf's config file: [[inputs.exec]] # Shell/commands array # Full command line to executable with parameters, or a glob pattern to run all matching files. - commands = ["/path/to/script/vsanmetrics.py -s vcenter01.example.com -u administrator@vsphere.local -p MyAwesomePassword -c VSAN-CLUSTER --performance --capacity"] + commands = ["/path/to/script/vsanmetrics.py -s vcenter01.example.com -u administrator@vsphere.local -p MyAwesomePassword -c VSAN-CLUSTER --performance --capacity --health"] # Timeout for each command to complete. timeout = "60s" @@ -158,7 +159,7 @@ If needed, you can specify more than one input plugin. It might be useful if you [[inputs.exec]] # Shell/commands array # Full command line to executable with parameters, or a glob pattern to run all matching files. - commands = ["/path/to/script/vsanmetrics.py -s vcenter01.example.com -u administrator@vsphere.local -p MyAwesomePassword -c VSAN-CLUSTER --performance --capacity"] + commands = ["/path/to/script/vsanmetrics.py -s vcenter01.example.com -u administrator@vsphere.local -p MyAwesomePassword -c VSAN-CLUSTER --performance --capacity --health"] # Timeout for each command to complete. timeout = "60s" @@ -172,7 +173,7 @@ If needed, you can specify more than one input plugin. It might be useful if you [[inputs.exec]] # Shell/commands array # Full command line to executable with parameters, or a glob pattern to run all matching files. - commands = ["/path/to/script/vsanmetrics.py -s vcenter02.example.com -u administrator@vsphere.local -p MyAwesomePassword -c VSAN-CLUSTER --performance --capacity"] + commands = ["/path/to/script/vsanmetrics.py -s vcenter02.example.com -u administrator@vsphere.local -p MyAwesomePassword -c VSAN-CLUSTER --performance --capacity --health"] # Timeout for each command to complete. timeout = "60s" @@ -185,6 +186,7 @@ If needed, you can specify more than one input plugin. It might be useful if you ``` # Author + **Erwan Quélin** - - diff --git a/vsanmetrics.py b/vsanmetrics.py index 56ae275..84f0168 100644 --- a/vsanmetrics.py +++ b/vsanmetrics.py @@ -3,7 +3,7 @@ # Erwan Quelin - erwan.quelin@gmail.com from pyVim.connect import SmartConnect, Disconnect -from pyVmomi import pbm, VmomiSupport, SoapStubAdapter, vim, vmodl +from pyVmomi import VmomiSupport, SoapStubAdapter, vim, vmodl import argparse import atexit @@ -16,7 +16,7 @@ import vsanmgmtObjects -def get_args(): +def get_args(): parser = argparse.ArgumentParser( description='Export vSAN cluster performance and storage usage statistics to InfluxDB line protocol') @@ -41,8 +41,8 @@ def get_args(): action='store', help='Password to use when connecting to vcenter') - parser.add_argument('-c', '--cluster_name', - dest='clusterName', + parser.add_argument('-c', '--cluster_name', + dest='clusterName', required=True, help='Cluster Name') @@ -54,10 +54,14 @@ def get_args(): help="Output storage usage metrics", action="store_true") + parser.add_argument("--health", + help="Output cluster health status", + action="store_true") + parser.add_argument('--skipentitytypes', - required=False, - action='store', - help='List of entity types to skip. Separated by a comma') + required=False, + action='store', + help='List of entity types to skip. Separated by a comma') args = parser.parse_args() @@ -70,21 +74,23 @@ def get_args(): print("You can't skip a performance entity type if you don't provide the --performance tag") exit() - if not args.performance and not args.capacity: - print('Please provide tag(s) --performance and/or --capacity to specify what type of data you want to collect') + if not args.performance and not args.capacity and not args.health: + print('Please provide tag(s) --performance and/or --capacity and/or --health to specify what type of data you want to collect') exit() return args + # Get cluster informations def getClusterInstance(clusterName, content): - searchIndex = content.searchIndex - datacenters = content.rootFolder.childEntity - for datacenter in datacenters: - cluster = searchIndex.FindChild(datacenter.hostFolder, clusterName) - if cluster is not None: - return cluster - return None + searchIndex = content.searchIndex + datacenters = content.rootFolder.childEntity + for datacenter in datacenters: + cluster = searchIndex.FindChild(datacenter.hostFolder, clusterName) + if cluster is not None: + return cluster + return None + def getInformations(witnessHosts, cluster): @@ -92,13 +98,13 @@ def getInformations(witnessHosts, cluster): hostnames = {} disks = {} - ### Get Host and disks informations + # Get Host and disks informations for host in cluster.host: - #Get relationship between host id and hostname + # Get relationship between host id and hostname hostnames[host.summary.host] = host.summary.config.name - #Get all disk (cache and capcity) attached to hosts in the cluster + # Get all disk (cache and capcity) attached to hosts in the cluster diskAll = host.configManager.vsanSystem.QueryDisksForVsan() for disk in diskAll: @@ -109,11 +115,10 @@ def getInformations(witnessHosts, cluster): for vsanHostConfig in cluster.configurationEx.vsanHostConfig: uuid[vsanHostConfig.clusterInfo.nodeUuid] = hostnames[vsanHostConfig.hostSystem] - ### Get witness disks informations - + # Get witness disks informations + return uuid, disks - return uuid , disks # Get hosts informations (hostname and disks) def getHostsInfos(cluster): @@ -133,25 +138,28 @@ def getHostsInfos(cluster): for vsanHostConfig in cluster.configurationEx.vsanHostConfig: hostinfos[vsanHostConfig.clusterInfo.nodeUuid] = hostnames[vsanHostConfig.hostSystem] - return disksinfos,hostinfos + return disksinfos, hostinfos + # Get all VM managed by the hosts in the cluster, return array with name and uuid of the VMs -#SOurce: https://github.com/vmware/pyvmomi-community-samples/blob/master/samples/getvmsbycluster.py +# Source: https://github.com/vmware/pyvmomi-community-samples/blob/master/samples/getvmsbycluster.py def getVMs(cluster): - + vms = {} - - for host in cluster.host: # Iterate through Hosts in the Cluster + + for host in cluster.host: # Iterate through Hosts in the Cluster for vm in host.vm: # Iterate through each VM on the host vms[vm.summary.config.instanceUuid] = vm.summary.config.name return vms + # Output data in the Influx Line protocol format -def printInfluxLineProtocol(measurement,tags,fields,timestamp): - result = "%s,%s %s %i" % (measurement,arrayToString(tags),arrayToString(fields),timestamp) +def printInfluxLineProtocol(measurement, tags, fields, timestamp): + result = "%s,%s %s %i" % (measurement, arrayToString(tags), arrayToString(fields), timestamp) print(result) + # Convert time in string format to epoch timestamp (nanosecond) def convertStrToTimestamp(str): sec = time.mktime(datetime.strptime(str, "%Y-%m-%d %H:%M:%S").timetuple()) @@ -160,8 +168,9 @@ def convertStrToTimestamp(str): return ns + # parse EntytyRefID, convert to tags -def parseEntityRefId(measurement,entityRefId,uuid,vms,disks): +def parseEntityRefId(measurement, entityRefId, uuid, vms, disks): tags = {} @@ -183,11 +192,11 @@ def parseEntityRefId(measurement,entityRefId,uuid,vms,disks): if measurement == 'host-domclient': tags['uuid'] = entityRefId[1] - tags['hostname'] = uuid[entityRefId[1]] + tags['hostname'] = uuid[entityRefId[1]] if measurement == 'host-domcompmgr': tags['uuid'] = entityRefId[1] - tags['hostname'] = uuid[entityRefId[1]] + tags['hostname'] = uuid[entityRefId[1]] if measurement == 'cache-disk': tags['uuid'] = entityRefId[1] @@ -207,58 +216,60 @@ def parseEntityRefId(measurement,entityRefId,uuid,vms,disks): tags['vmname'] = vms[entityRefId[1]] if measurement == 'virtual-disk': - split = entityRefId[1].split("/") + split = entityRefId[1].split("/") tags['uuid'] = split[0] tags['disk'] = split[1] if measurement == 'vsan-vnic-net': - split = entityRefId[1].split("|") + split = entityRefId[1].split("|") tags['uuid'] = split[0] - tags['hostname'] = uuid[split[0]] + tags['hostname'] = uuid[split[0]] tags['stack'] = split[1] tags['vmk'] = split[2] if measurement == 'vsan-host-net': tags['uuid'] = entityRefId[1] - tags['hostname'] = uuid[entityRefId[1]] + tags['hostname'] = uuid[entityRefId[1]] if measurement == 'vsan-pnic-net': - split = entityRefId[1].split("|") + split = entityRefId[1].split("|") tags['uuid'] = split[0] - tags['hostname'] = uuid[split[0]] + tags['hostname'] = uuid[split[0]] tags['vmnic'] = split[1] if measurement == 'vsan-iscsi-host': tags['uuid'] = entityRefId[1] - tags['hostname'] = uuid[entityRefId[1]] + tags['hostname'] = uuid[entityRefId[1]] if measurement == 'vsan-iscsi-target': tags['uuid'] = entityRefId[1] - tags['hostname'] = uuid[entityRefId[1]] + tags['hostname'] = uuid[entityRefId[1]] if measurement == 'vsan-iscsi-lun': tags['uuid'] = entityRefId[1] - tags['hostname'] = uuid[entityRefId[1]] + tags['hostname'] = uuid[entityRefId[1]] return tags + # Convert array to a string compatible with influxdb line protocol tags or fields def arrayToString(data): i = 0 result = "" - for key,val in data.items(): + for key, val in data.items(): if i == 0: - result = "%s=%s" % (key,val) + result = "%s=%s" % (key, val) else: - result = result + ",%s=%s" % (key,val) - i = i + 1 + result = result + ",%s=%s" % (key, val) + i = i + 1 return result + def parseVsanObjectSpaceSummary(data): fields = {} @@ -269,12 +280,13 @@ def parseVsanObjectSpaceSummary(data): fields['reservedCapacityB'] = data.reservedCapacityB fields['temporaryOverheadB'] = data.temporaryOverheadB fields['usedB'] = data.usedB - + if data.provisionCapacityB: fields['provisionCapacityB'] = data.provisionCapacityB - + return fields + def parseVimVsanDataEfficiencyCapacityState(data): fields = {} @@ -285,9 +297,10 @@ def parseVimVsanDataEfficiencyCapacityState(data): fields['physicalCapacityUsed'] = data.physicalCapacityUsed fields['ratio'] = float(data.logicalCapacityUsed) / float(data.physicalCapacityUsed) - return fields + return fields + -def parseCapacity(scope,data,tagsbase,timestamp): +def parseCapacity(scope, data, tagsbase, timestamp): tags = {} fields = {} @@ -308,7 +321,30 @@ def parseCapacity(scope,data,tagsbase,timestamp): else: fields = parseVsanObjectSpaceSummary(data) - printInfluxLineProtocol(measurement,tags,fields,timestamp) + printInfluxLineProtocol(measurement, tags, fields, timestamp) + + +def parseHealth(test, value, tagsbase, timestamp): + + measurement = 'health_' + test + + tags = tagsbase + + fields = {} + + if value == 'green': + fields['health'] = 0 + + if value == 'yellow': + fields['health'] = 1 + + if value == 'red': + fields['health'] = 2 + + fields['value'] = '\"' + value + '\"' + + printInfluxLineProtocol(measurement, tags, fields, timestamp) + # Main... def main(): @@ -322,10 +358,10 @@ def main(): # Connect to vCenter try: si = SmartConnect(host=args.vcenter, - user=args.user, - pwd=args.password, - port=int(args.port), - sslContext=context) + user=args.user, + pwd=args.password, + port=int(args.port), + sslContext=context) if not si: print("Could not connect to the specified host using specified " "username and password") @@ -346,8 +382,8 @@ def main(): content = si.RetrieveContent() # Get Info about cluster - cluster_obj = getClusterInstance(args.clusterName,content) - + cluster_obj = getClusterInstance(args.clusterName, content) + # Exit if the cluster provided in the arguments is not available if not cluster_obj: print 'The required cluster not found in inventory, validate input.' @@ -361,18 +397,18 @@ def main(): apiVersion = vsanapiutils.GetLatestVmodlVersion(args.vcenter) vcMos = vsanapiutils.GetVsanVcMos(si._stub, context=context, version=apiVersion) - ## CAPACITY + # CAPACITY if args.capacity: vsanSpaceReportSystem = vcMos['vsan-cluster-space-report-system'] try: spaceReport = vsanSpaceReportSystem.VsanQuerySpaceUsage( - cluster = cluster_obj - ) + cluster=cluster_obj + ) except vmodl.fault.InvalidArgument as e: print("Caught InvalidArgument exception : " + str(e)) - return -1 + return -1 except vmodl.fault.NotSupported as e: print("Caught NotSupported exception : " + str(e)) return -1 @@ -383,20 +419,45 @@ def main(): timestamp = int(time.time() * 1000000000) - parseCapacity('global',spaceReport,tagsbase,timestamp) - parseCapacity('summary',spaceReport,tagsbase,timestamp) + parseCapacity('global', spaceReport, tagsbase, timestamp) + parseCapacity('summary', spaceReport, tagsbase, timestamp) if spaceReport.efficientCapacity: - parseCapacity('efficientcapacity',spaceReport,tagsbase,timestamp) + parseCapacity('efficientcapacity', spaceReport, tagsbase, timestamp) for object in spaceReport.spaceDetail.spaceUsageByObjectType: - parseCapacity(object.objType,object,tagsbase,timestamp) + parseCapacity(object.objType, object, tagsbase, timestamp) + + # HEALTH + + if args.health: + vsanClusterHealthSystem = vcMos['vsan-cluster-health-system'] + + try: + clusterHealth = vsanClusterHealthSystem.VsanQueryVcClusterHealthSummary( + cluster=cluster_obj + ) + except vmodl.fault.NotFound as e: + print("Caught NotFound exception : " + str(e)) + return -1 + except vmodl.fault.RuntimeFault as e: + print("Caught RuntimeFault exception : " + str(e)) + return -1 - ## PERFORMANCE + timestamp = int(time.time() * 1000000000) + + for group in clusterHealth.groups: + + splitGroupId = group.groupId.split('.') + testName = splitGroupId[-1] + + parseHealth(testName, group.groupHealth, tagsbase, timestamp) + + # PERFORMANCE if args.performance: vsanVcStretchedClusterSystem = vcMos['vsan-stretched-cluster-system'] - vsanPerfSystem = vcMos['vsan-performance-manager'] + vsanPerfSystem = vcMos['vsan-performance-manager'] # Get VM uuid/names vms = getVMs(cluster_obj) @@ -404,14 +465,14 @@ def main(): # Get uuid/names relationship informations for hosts and disks uuid, disks = getInformations(content, cluster_obj) - #### Witness + # Witness # Retrieve Witness Host for given VSAN Cluster witnessHosts = vsanVcStretchedClusterSystem.VSANVcGetWitnessHosts( cluster=cluster_obj - ) + ) for witnessHost in witnessHosts: - host = (vim.HostSystem(witnessHost.host._moId,si._stub)) + host = (vim.HostSystem(witnessHost.host._moId, si._stub)) uuid[witnessHost.nodeUuid] = host.name @@ -425,7 +486,7 @@ def main(): # Gather a list of the available entity types (ex: vsan-host-net) entityTypes = vsanPerfSystem.VsanPerfGetSupportedEntityTypes() - # query interval, last 10 minutes -- UTC !!! + # query interval, last 10 minutes -- UTC !!! endTime = datetime.utcnow() startTime = endTime + timedelta(minutes=-10) @@ -437,7 +498,7 @@ def main(): for entities in entityTypes: if entities.name not in splitSkipentitytypes: - + entitieName = entities.name labels = [] @@ -449,22 +510,22 @@ def main(): labels.append(metric.label) - # Build entity + # Build entity entity = '%s:*' % (entities.name) # Build spec object spec = vim.cluster.VsanPerfQuerySpec( - endTime = endTime, - entityRefId = entity, - labels = labels, - startTime = startTime + endTime=endTime, + entityRefId=entity, + labels=labels, + startTime=startTime ) # Get statistics try: metrics = vsanPerfSystem.VsanPerfQueryPerf( - querySpecs = [spec], - cluster = cluster_obj + querySpecs=[spec], + cluster=cluster_obj ) except vmodl.fault.InvalidArgument as e: @@ -495,14 +556,14 @@ def main(): if not metric.sampleInfo == "": - measurement = entitieName + measurement = entitieName sampleInfos = metric.sampleInfo.split(",") lenValues = len(sampleInfos) timestamp = convertStrToTimestamp(sampleInfos[lenValues - 1]) - tags = parseEntityRefId(measurement,metric.entityRefId,uuid,vms,disks) + tags = parseEntityRefId(measurement, metric.entityRefId, uuid, vms, disks) tags.update(tagsbase) @@ -514,7 +575,7 @@ def main(): fields[value.metricId.label] = float(listValue[lenValues - 1]) - printInfluxLineProtocol(measurement,tags,fields,timestamp) + printInfluxLineProtocol(measurement, tags, fields, timestamp) return 0