From 4321ec83345342a65f8016542fa7c6efec5de2fe Mon Sep 17 00:00:00 2001 From: chrisronline Date: Wed, 23 Jun 2021 07:24:52 -0400 Subject: [PATCH] Add in task type to the message --- .../server/lib/log_health_metrics.test.ts | 16 ++++++++++++++-- .../server/lib/log_health_metrics.ts | 14 +++++++++++++- .../server/monitoring/task_run_statistics.ts | 3 +++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts b/x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts index 61a5dfcf962334..85a3009c96aeb6 100644 --- a/x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts +++ b/x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts @@ -195,6 +195,11 @@ describe('logHealthMetrics', () => { stats: { runtime: { value: { + drift_by_type: { + 'taskType:test': { + p99: 60000, + }, + }, drift: { p99: 60000, }, @@ -206,7 +211,7 @@ describe('logHealthMetrics', () => { logHealthMetrics(health, logger, config); expect((logger as jest.Mocked).warn.mock.calls[0][0] as string).toBe( - `Detected delay task start of 60s (which exceeds configured value of 60s)` + `Detected delay task start of 60s for task \"taskType:test\" (which exceeds configured value of 60s)` ); const secondMessage = JSON.parse( @@ -326,7 +331,14 @@ function getMockMonitoredHealth(overrides = {}): MonitoredHealth { p95: 2500, p99: 3000, }, - drift_by_type: {}, + drift_by_type: { + 'taskType:test': { + p50: 1000, + p90: 2000, + p95: 2500, + p99: 3000, + }, + }, load: { p50: 1000, p90: 2000, diff --git a/x-pack/plugins/task_manager/server/lib/log_health_metrics.ts b/x-pack/plugins/task_manager/server/lib/log_health_metrics.ts index 8904d7abd58db0..eceeaa26394d39 100644 --- a/x-pack/plugins/task_manager/server/lib/log_health_metrics.ts +++ b/x-pack/plugins/task_manager/server/lib/log_health_metrics.ts @@ -49,8 +49,20 @@ export function logHealthMetrics( if ( driftInSeconds >= config.monitored_stats_health_verbose_log.warn_delayed_task_start_in_seconds ) { + const taskType = Object.keys(monitoredHealth.stats.runtime?.value.drift_by_type ?? {}).reduce( + (accum: string, typeName) => { + if ( + monitoredHealth.stats.runtime?.value.drift_by_type[typeName].p99 === + monitoredHealth.stats.runtime?.value.drift.p99 + ) { + accum = typeName; + } + return accum; + }, + 'unknown' + ); logger.warn( - `Detected delay task start of ${driftInSeconds}s (which exceeds configured value of ${config.monitored_stats_health_verbose_log.warn_delayed_task_start_in_seconds}s)` + `Detected delay task start of ${driftInSeconds}s for task "${taskType}" (which exceeds configured value of ${config.monitored_stats_health_verbose_log.warn_delayed_task_start_in_seconds}s)` ); logLevel = LogLevel.Warn; } diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index b792f4ca475f93..da86cfad2a911e 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -103,6 +103,9 @@ type ResultFrequencySummary = ResultFrequency & { export interface SummarizedTaskRunStat extends JsonObject { drift: AveragedStat; + drift_by_type: { + [alertType: string]: AveragedStat; + }; load: AveragedStat; execution: { duration: Record;