From cebe349250f663e3e634ab22d78f7b6e45e60a27 Mon Sep 17 00:00:00 2001 From: Devrim Demiroz <62701796+devrimdemiroz@users.noreply.github.com> Date: Fri, 10 Mar 2023 08:26:46 +0100 Subject: [PATCH] spanmetrics dashboard premiere (#787) * spanmetrics dashboard premiere * cleaned up unnecessary datasources mistake * pr #787 added to changelog * markdown check fix --------- Co-authored-by: Juliano Costa --- CHANGELOG.md | 3 +- .../general/spanmetrics-dashboard.json | 1003 +++++++++++++++++ 2 files changed, 1005 insertions(+), 1 deletion(-) create mode 100644 src/grafana/provisioning/dashboards/general/spanmetrics-dashboard.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f5c512cff..0cf25d5f1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ release. ## Unreleased -* +* spanmetrics dashboard service&operation rates%latencies +([#787](https://github.com/open-telemetry/opentelemetry-demo/pull/787)) ## v0.1.0 diff --git a/src/grafana/provisioning/dashboards/general/spanmetrics-dashboard.json b/src/grafana/provisioning/dashboards/general/spanmetrics-dashboard.json new file mode 100644 index 0000000000..187a0ab13d --- /dev/null +++ b/src/grafana/provisioning/dashboards/general/spanmetrics-dashboard.json @@ -0,0 +1,1003 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Spanmetrics way of demo application view.", + "author": { + "name": "devrimdemiroz" + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 24, + "panels": [], + "title": "Service Level - Throughput and Latencies", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 2 + }, + { + "color": "#EAB839", + "value": 64 + }, + { + "color": "orange", + "value": 128 + }, + { + "color": "red", + "value": 256 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 20, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 2, + "interval": "5m", + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(7,histogram_quantile(0.50, sum(rate(latency_bucket{service_name=~\"$service\", operation=~\"$operation\"}[$__rate_interval])) by (le,service_name)))", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "legendFormat": "{{service_name}}-quantile_0.50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(7,histogram_quantile(0.95, sum(rate(latency_bucket{service_name=~\"$service\", operation=~\"$operation\"}[$__range])) by (le,service_name)))", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{le}} - {{service_name}}", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.99, sum(rate(latency_bucket{service_name=~\"$service\", operation=~\"$operation\"}[$__rate_interval])) by (le,service_name))", + "hide": true, + "interval": "", + "legendFormat": "quantile99", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.999, sum(rate(latency_bucket{service_name=~\"$service\", operation=~\"$operation\"}[$__rate_interval])) by (le,service_name))", + "hide": true, + "interval": "", + "legendFormat": "quantile999", + "range": true, + "refId": "D" + } + ], + "title": "Top 3x3 - Service Latency - quantile95", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "super-light-blue", + "value": 1 + }, + { + "color": "#EAB839", + "value": 2 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "interval": "5m", + "options": { + "displayMode": "lcd", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(7,sum by (service_name) (rate( calls_total{service_name=~\"$service\", operation=~\"$operation\"}[$__range])))", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "{{service_name}}", + "range": false, + "refId": "A" + } + ], + "title": "Top 7 Services Mean Rate over Range", + "transformations": [], + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-reds" + }, + "decimals": 4, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 15 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 15, + "interval": "5m", + "options": { + "displayMode": "lcd", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(7,sum(rate( calls_total{status_code=\"STATUS_CODE_ERROR\",service_name=~\"$service\", operation=~\"$operation\"}[$__range])) by (service_name))", + "instant": true, + "interval": "", + "legendFormat": "{{service_name}}", + "range": false, + "refId": "A" + } + ], + "title": "Top 7 Services Mean ERROR Rate over Range", + "transformations": [], + "type": "bargauge" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 14, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "refId": "A" + } + ], + "title": "Operations Level - Throughput", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "displayMode": "auto", + "inspect": false + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "bRate" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "lcd-gauge" + }, + { + "id": "color", + "value": { + "mode": "continuous-BlYlRd" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "eRate" + }, + "properties": [ + { + "id": "custom.displayMode", + "value": "lcd-gauge" + }, + { + "id": "color", + "value": { + "mode": "continuous-RdYlGr" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Error Rate" + }, + "properties": [ + { + "id": "custom.width", + "value": 663 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Rate" + }, + "properties": [ + { + "id": "custom.width", + "value": 667 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Service" + }, + "properties": [ + { + "id": "custom.width", + "value": null + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 22, + "interval": "5m", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "exemplar": false, + "expr": "topk(7, sum(rate(calls_total{service_name=~\"$service\", operation=~\"$operation\"}[$__range])) by (operation,service_name)) ", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "Rate" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "exemplar": false, + "expr": "topk(7, sum(rate(calls_total{status_code=\"STATUS_CODE_ERROR\",service_name=~\"$service\", operation=~\"$operation\"}[$__range])) by (operation,service_name))", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "Error Rate" + } + ], + "title": "Top 7 Operations and Errors (APM Table)", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "operation" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time 1": true, + "Time 2": true + }, + "indexByName": {}, + "renameByName": { + "Value #Error Rate": "Error Rate", + "Value #Rate": "Rate", + "service_name 1": "Rate in Service", + "service_name 2": "Error Rate in Service" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "bRate", + "mode": "reduceRow", + "reduce": { + "include": [ + "Rate" + ], + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "eRate", + "mode": "reduceRow", + "reduce": { + "include": [ + "Error Rate" + ], + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Error Rate": true, + "Rate": true, + "bRate": false + }, + "indexByName": { + "Error Rate": 4, + "Error Rate in Service": 6, + "Rate": 1, + "Rate in Service": 5, + "bRate": 2, + "eRate": 3, + "operation": 0 + }, + "renameByName": { + "Rate in Service": "Service", + "bRate": "Rate", + "eRate": "Error Rate", + "operation": "Operation Name" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Rate" + } + ] + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 20, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "refId": "A" + } + ], + "title": "Operation Level - Latencies", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "green", + "value": 2 + }, + { + "color": "#EAB839", + "value": 64 + }, + { + "color": "orange", + "value": 128 + }, + { + "color": "red", + "value": 256 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 25, + "interval": "5m", + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(7,histogram_quantile(0.50, sum(rate(latency_bucket{service_name=~\"$service\", operation=~\"$operation\"}[$__rate_interval])) by (le,service_name)))", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "legendFormat": "{{service_name}}-quantile_0.50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(7,histogram_quantile(0.95, sum(rate(latency_bucket{service_name=~\"$service\", operation=~\"$operation\"}[$__range])) by (le,operation)))", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{operation}}", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.99, sum(rate(latency_bucket{service_name=~\"$service\", operation=~\"$operation\"}[$__rate_interval])) by (le,service_name))", + "hide": true, + "interval": "", + "legendFormat": "quantile99", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "histogram_quantile(0.999, sum(rate(latency_bucket{service_name=~\"$service\", operation=~\"$operation\"}[$__rate_interval])) by (le,service_name))", + "hide": true, + "interval": "", + "legendFormat": "quantile999", + "range": true, + "refId": "D" + } + ], + "title": "Top 3x3 - Operation Latency - quantile95", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 10, + "interval": "5m", + "options": { + "displayMode": "lcd", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(7, sum by (operation,service_name)(increase(latency_sum{service_name=~\"${service}\", operation=~\"$operation\"}[5m]) / increase(latency_count{service_name=~\"${service}\",operation=~\"$operation\"}[5m\n])))", + "instant": true, + "interval": "", + "legendFormat": "{{operation}} [{{service_name}}]", + "range": false, + "refId": "A" + } + ], + "title": "Top 7 Highest Endpoint Latencies Mean Over Range ", + "transformations": [], + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 47 + }, + "id": 16, + "interval": "5m", + "options": { + "legend": { + "calcs": [ + "mean", + "logmin", + "max", + "delta" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "editorMode": "code", + "exemplar": true, + "expr": "topk(7,sum by (operation,service_name)(increase(latency_sum{service_name=~\"$service\", operation=~\"$operation\"}[$__rate_interval]) / increase(latency_count{service_name=~\"$service\", operation=~\"$operation\"}[$__rate_interval])))", + "instant": false, + "interval": "", + "legendFormat": "[{{service_name}}] {{operation}}", + "range": true, + "refId": "A" + } + ], + "title": "Top 7 Latencies Over Range ", + "type": "timeseries" + } + ], + "refresh": "5m", + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "definition": "query_result(count by (service_name)(count_over_time(calls_total[$__range])))", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "service", + "options": [], + "query": { + "query": "query_result(count by (service_name)(count_over_time(calls_total[$__range])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "/.*service_name=\"(.*)\".*/", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "webstore-metrics" + }, + "definition": "query_result(sum ({__name__=~\".*calls_total\",service_name=~\"$service\"}) by (operation))", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "operation", + "options": [], + "query": { + "query": "query_result(sum ({__name__=~\".*calls_total\",service_name=~\"$service\"}) by (operation))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "/.*operation=\"(.*)\".*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Spanmetrics Demo Dashboard", + "uid": "W2gX2zHVk48", + "version": 1, + "weekStart": "" +}