From b0d109edec95eef6b4d70fd2a84b69fefcfb6712 Mon Sep 17 00:00:00 2001 From: Ceridwen Coghlan Date: Fri, 9 Jun 2023 20:45:22 +0000 Subject: [PATCH 1/2] alert on remaining disk instead of utilization Signed-off-by: Ceridwen Coghlan --- .../gcp/modules/monitoring/infra/alerts.tf | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/terraform/gcp/modules/monitoring/infra/alerts.tf b/terraform/gcp/modules/monitoring/infra/alerts.tf index e54afb6e..202da6f7 100644 --- a/terraform/gcp/modules/monitoring/infra/alerts.tf +++ b/terraform/gcp/modules/monitoring/infra/alerts.tf @@ -108,7 +108,7 @@ resource "google_monitoring_alert_policy" "cloud_sql_memory_utilization" { project = var.project_id } -# Cloud SQL Database Disk Utilization > 98% +# Cloud SQL Database Disk has < 20GiB Free resource "google_monitoring_alert_policy" "cloud_sql_disk_utilization" { # In the absence of data, incident will auto-close in 7 days alert_strategy { @@ -118,30 +118,31 @@ resource "google_monitoring_alert_policy" "cloud_sql_disk_utilization" { combiner = "OR" conditions { - condition_threshold { - aggregations { - alignment_period = "300s" - per_series_aligner = "ALIGN_MEAN" - } - - comparison = "COMPARISON_GT" - duration = "0s" - filter = "metric.type=\"cloudsql.googleapis.com/database/disk/utilization\" resource.type=\"cloudsql_database\"" - threshold_value = "0.98" - + condition_monitoring_query_language { + duration = "0s" + query = <<-EOT + fetch cloudsql_database + | { bytes: metric 'cloudsql.googleapis.com/database/disk/bytes_used' + ; quota: metric 'cloudsql.googleapis.com/database/disk/quota' } + | join + | group_by 5m, [q_mean: mean(value.quota), b_mean: mean(value.bytes_used)] + | every 5m + | group_by [resource.database_id], [free_space: sub(mean(q_mean), mean(b_mean))] + | condition free_space < 20 'GiBy' + EOT trigger { count = "1" percent = "0" } } - display_name = "Cloud SQL Database - Disk utilization [MEAN]" + display_name = "Cloud SQL Database - Disk free space [MEAN]" } - display_name = "Cloud Sql Disk Utilization > 98%" + display_name = "Cloud SQL Database Disk has < 20GiB Free" documentation { - content = "Cloud SQL disk utilization is > 98%. Please increase capacity. Note that autoresize should be enabled for the database. Ensure there is no issue with the autoresize process." + content = "Cloud SQL disk has less than 20GiB free space remaining. Please increase capacity. Note that autoresize should be enabled for the database. Ensure there is no issue with the autoresize process." mime_type = "text/markdown" } From d77a5be86a41c189d34991f151479fe9ead20734 Mon Sep 17 00:00:00 2001 From: Ceridwen Coghlan Date: Fri, 9 Jun 2023 22:21:07 +0000 Subject: [PATCH 2/2] combining with 98% threshold Signed-off-by: Ceridwen Coghlan --- .../gcp/modules/monitoring/infra/alerts.tf | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/terraform/gcp/modules/monitoring/infra/alerts.tf b/terraform/gcp/modules/monitoring/infra/alerts.tf index 202da6f7..84dc46ad 100644 --- a/terraform/gcp/modules/monitoring/infra/alerts.tf +++ b/terraform/gcp/modules/monitoring/infra/alerts.tf @@ -115,9 +115,10 @@ resource "google_monitoring_alert_policy" "cloud_sql_disk_utilization" { auto_close = "604800s" } - combiner = "OR" + combiner = "AND" conditions { + # < 20GiB disk space free condition_monitoring_query_language { duration = "0s" query = <<-EOT @@ -136,6 +137,24 @@ resource "google_monitoring_alert_policy" "cloud_sql_disk_utilization" { } } + # AND disk utilization > 98% + condition_threshold { + aggregations { + alignment_period = "300s" + per_series_aligner = "ALIGN_MEAN" + } + + comparison = "COMPARISON_GT" + duration = "0s" + filter = "metric.type=\"cloudsql.googleapis.com/database/disk/utilization\" resource.type=\"cloudsql_database\"" + threshold_value = "0.98" + + trigger { + count = "1" + percent = "0" + } + } + display_name = "Cloud SQL Database - Disk free space [MEAN]" }