Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Fixes anomaly chart and validation for one week bucket span #69671

Merged
merged 2 commits into from
Jun 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions x-pack/plugins/ml/common/util/job_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import _ from 'lodash';
import semver from 'semver';
import { Duration } from 'moment';
// @ts-ignore
import numeral from '@elastic/numeral';

Expand Down Expand Up @@ -433,7 +434,7 @@ export function basicJobValidation(
messages.push({ id: 'bucket_span_empty' });
valid = false;
} else {
if (isValidTimeFormat(job.analysis_config.bucket_span)) {
if (isValidTimeInterval(job.analysis_config.bucket_span)) {
messages.push({
id: 'bucket_span_valid',
bucketSpan: job.analysis_config.bucket_span,
Expand Down Expand Up @@ -490,14 +491,14 @@ export function basicDatafeedValidation(datafeed: Datafeed): ValidationResults {

if (datafeed) {
let queryDelayMessage = { id: 'query_delay_valid' };
if (isValidTimeFormat(datafeed.query_delay) === false) {
if (isValidTimeInterval(datafeed.query_delay) === false) {
queryDelayMessage = { id: 'query_delay_invalid' };
valid = false;
}
messages.push(queryDelayMessage);

let frequencyMessage = { id: 'frequency_valid' };
if (isValidTimeFormat(datafeed.frequency) === false) {
if (isValidTimeInterval(datafeed.frequency) === false) {
frequencyMessage = { id: 'frequency_invalid' };
valid = false;
}
Expand Down Expand Up @@ -591,12 +592,33 @@ export function validateGroupNames(job: Job): ValidationResults {
};
}

function isValidTimeFormat(value: string | undefined): boolean {
/**
* Parses the supplied string to a time interval suitable for use in an ML anomaly
* detection job or datafeed.
* @param value the string to parse
* @return {Duration} the parsed interval, or null if it does not represent a valid
* time interval.
*/
export function parseTimeIntervalForJob(value: string | undefined): Duration | null {
if (value === undefined) {
return null;
}

// Must be a valid interval, greater than zero,
// and if specified in ms must be a multiple of 1000ms.
const interval = parseInterval(value, true);
return interval !== null && interval.asMilliseconds() !== 0 && interval.milliseconds() === 0
? interval
: null;
}

// Checks that the value for a field which represents a time interval,
// such as a job bucket span or datafeed query delay, is valid.
function isValidTimeInterval(value: string | undefined): boolean {
if (value === undefined) {
return true;
}
const interval = parseInterval(value);
return interval !== null && interval.asMilliseconds() !== 0;
return parseTimeIntervalForJob(value) !== null;
}

// Returns the latest of the last source data and last processed bucket timestamp,
Expand Down
16 changes: 13 additions & 3 deletions x-pack/plugins/ml/common/util/parse_interval.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import { parseInterval } from './parse_interval';

describe('ML parse interval util', () => {
test('correctly parses an interval containing unit and value', () => {
test('should correctly parse an interval containing a valid unit and value', () => {
expect(parseInterval('1d')!.as('d')).toBe(1);
expect(parseInterval('2y')!.as('y')).toBe(2);
expect(parseInterval('5M')!.as('M')).toBe(5);
Expand All @@ -20,15 +20,25 @@ describe('ML parse interval util', () => {
expect(parseInterval('0s')!.as('h')).toBe(0);
});

test('correctly handles zero value intervals', () => {
test('should correctly handle zero value intervals', () => {
expect(parseInterval('0h')!.as('h')).toBe(0);
expect(parseInterval('0d')).toBe(null);
});

test('returns null for an invalid interval', () => {
test('should return null for an invalid interval', () => {
expect(parseInterval('')).toBe(null);
expect(parseInterval('234asdf')).toBe(null);
expect(parseInterval('m')).toBe(null);
expect(parseInterval('1.5h')).toBe(null);
});

test('should correctly check for whether the interval units are valid Elasticsearch time units', () => {
expect(parseInterval('100s', true)!.as('s')).toBe(100);
expect(parseInterval('5m', true)!.as('m')).toBe(5);
expect(parseInterval('24h', true)!.as('h')).toBe(24);
expect(parseInterval('7d', true)!.as('d')).toBe(7);
expect(parseInterval('1w', true)).toBe(null);
expect(parseInterval('1M', true)).toBe(null);
expect(parseInterval('1y', true)).toBe(null);
});
});
19 changes: 16 additions & 3 deletions x-pack/plugins/ml/common/util/parse_interval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,15 @@ const INTERVAL_STRING_RE = new RegExp('^([0-9]*)\\s*(' + dateMath.units.join('|'
// for units of hour or less.
const SUPPORT_ZERO_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h'];

// List of time units which are supported for use in Elasticsearch durations
// (such as anomaly detection job bucket spans)
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units
const SUPPORT_ES_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h', 'd'];

// Parses an interval String, such as 7d, 1h or 30m to a moment duration.
// Optionally carries out an additional check that the interval is supported as a
// time unit by Elasticsearch, as units greater than 'd' for example cannot be used
// for anomaly detection job bucket spans.
// Differs from the Kibana ui/utils/parse_interval in the following ways:
// 1. A value-less interval such as 'm' is not allowed - in line with the ML back-end
// not accepting such interval Strings for the bucket span of a job.
Expand All @@ -25,7 +33,7 @@ const SUPPORT_ZERO_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h'];
// to work with units less than 'day'.
// 3. Fractional intervals e.g. 1.5h or 4.5d are not allowed, in line with the behaviour
// of the Elasticsearch date histogram aggregation.
export function parseInterval(interval: string): Duration | null {
export function parseInterval(interval: string, checkValidEsUnit = false): Duration | null {
const matches = String(interval).trim().match(INTERVAL_STRING_RE);
if (!Array.isArray(matches) || matches.length < 3) {
return null;
Expand All @@ -36,8 +44,13 @@ export function parseInterval(interval: string): Duration | null {
const unit = matches[2] as SupportedUnits;

// In line with moment.js, only allow zero value intervals when the unit is less than 'day'.
// And check for isNaN as e.g. valueless 'm' will pass the regex test.
if (isNaN(value) || (value < 1 && SUPPORT_ZERO_DURATION_UNITS.indexOf(unit) === -1)) {
// And check for isNaN as e.g. valueless 'm' will pass the regex test,
// plus an optional check that the unit is not w/M/y which are not fully supported by ES.
if (
isNaN(value) ||
(value < 1 && SUPPORT_ZERO_DURATION_UNITS.indexOf(unit) === -1) ||
(checkValidEsUnit === true && SUPPORT_ES_DURATION_UNITS.indexOf(unit) === -1)
) {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ export class JobCreator {
}

protected _setBucketSpanMs(bucketSpan: BucketSpan) {
const bs = parseInterval(bucketSpan);
const bs = parseInterval(bucketSpan, true);
this._bucketSpanMs = bs === null ? 0 : bs.asMilliseconds();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export class SingleMetricJobCreator extends JobCreator {
const functionName = this._aggs[0].dslName;
const timeField = this._job_config.data_description.time_field;

const duration = parseInterval(this._job_config.analysis_config.bucket_span);
const duration = parseInterval(this._job_config.analysis_config.bucket_span, true);
if (duration === null) {
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ export function populateValidationMessages(
basicValidations.bucketSpan.message = msg;
} else if (validationResults.contains('bucket_span_invalid')) {
basicValidations.bucketSpan.valid = false;
basicValidations.bucketSpan.message = invalidTimeFormatMessage(
basicValidations.bucketSpan.message = invalidTimeIntervalMessage(
jobConfig.analysis_config.bucket_span
);
}
Expand All @@ -163,12 +163,12 @@ export function populateValidationMessages(

if (validationResults.contains('query_delay_invalid')) {
basicValidations.queryDelay.valid = false;
basicValidations.queryDelay.message = invalidTimeFormatMessage(datafeedConfig.query_delay);
basicValidations.queryDelay.message = invalidTimeIntervalMessage(datafeedConfig.query_delay);
}

if (validationResults.contains('frequency_invalid')) {
basicValidations.frequency.valid = false;
basicValidations.frequency.message = invalidTimeFormatMessage(datafeedConfig.frequency);
basicValidations.frequency.message = invalidTimeIntervalMessage(datafeedConfig.frequency);
}
}

Expand Down Expand Up @@ -202,16 +202,18 @@ export function checkForExistingJobAndGroupIds(
};
}

function invalidTimeFormatMessage(value: string | undefined) {
function invalidTimeIntervalMessage(value: string | undefined) {
return i18n.translate(
'xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage',
{
defaultMessage:
'{value} is not a valid time interval format e.g. {tenMinutes}, {oneHour}. It also needs to be higher than zero.',
'{value} is not a valid time interval format e.g. {thirtySeconds}, {tenMinutes}, {oneHour}, {sevenDays}. It also needs to be higher than zero.',
values: {
value,
thirtySeconds: '30s',
tenMinutes: '10m',
oneHour: '1h',
sevenDays: '7d',
},
}
);
Expand Down
12 changes: 9 additions & 3 deletions x-pack/plugins/ml/public/application/util/time_buckets.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ import { getFieldFormats, getUiSettings } from './dependency_cache';
import { FIELD_FORMAT_IDS, UI_SETTINGS } from '../../../../../../src/plugins/data/public';

const unitsDesc = dateMath.unitsDesc;
const largeMax = unitsDesc.indexOf('w'); // Multiple units of week or longer converted to days for ES intervals.

// Index of the list of time interval units at which larger units (i.e. weeks, months, years) need
// need to be converted to multiples of the largest unit supported in ES aggregation intervals (i.e. days).
// Note that similarly the largest interval supported for ML bucket spans is 'd'.
const timeUnitsMaxSupportedIndex = unitsDesc.indexOf('w');

const calcAuto = timeBucketsCalcAutoIntervalProvider();

Expand Down Expand Up @@ -383,9 +387,11 @@ export function calcEsInterval(duration) {
const val = duration.as(unit);
// find a unit that rounds neatly
if (val >= 1 && Math.floor(val) === val) {
// if the unit is "large", like years, but isn't set to 1, ES will throw an error.
// Apart from for date histograms, ES only supports time units up to 'd',
// meaning we can't for example use 'w' for job bucket spans.
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units
// So keep going until we get out of the "large" units.
if (i <= largeMax && val !== 1) {
if (i <= timeUnitsMaxSupportedIndex) {
continue;
}

Expand Down
30 changes: 15 additions & 15 deletions x-pack/plugins/ml/public/application/util/time_buckets.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -232,34 +232,34 @@ describe('ML - time buckets', () => {
expression: '3d',
});
expect(calcEsInterval(moment.duration(7, 'd'))).toEqual({
value: 1,
unit: 'w',
expression: '1w',
value: 7,
unit: 'd',
expression: '7d',
});
expect(calcEsInterval(moment.duration(1, 'w'))).toEqual({
value: 1,
unit: 'w',
expression: '1w',
value: 7,
unit: 'd',
expression: '7d',
});
expect(calcEsInterval(moment.duration(4, 'w'))).toEqual({
value: 28,
unit: 'd',
expression: '28d',
});
expect(calcEsInterval(moment.duration(1, 'M'))).toEqual({
value: 1,
unit: 'M',
expression: '1M',
value: 30,
unit: 'd',
expression: '30d',
});
expect(calcEsInterval(moment.duration(12, 'M'))).toEqual({
value: 1,
unit: 'y',
expression: '1y',
value: 365,
unit: 'd',
expression: '365d',
});
expect(calcEsInterval(moment.duration(1, 'y'))).toEqual({
value: 1,
unit: 'y',
expression: '1y',
value: 365,
unit: 'd',
expression: '365d',
});
});
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,11 @@ describe('ML - validateJob', () => {
});
};
it('invalid bucket span formats', () => {
const invalidBucketSpanFormats = ['a', '10', '$'];
const invalidBucketSpanFormats = ['a', '10', '$', '500ms', '1w', '2M', '1y'];
return bucketSpanFormatTests(invalidBucketSpanFormats, 'bucket_span_invalid');
});
it('valid bucket span formats', () => {
const validBucketSpanFormats = ['1s', '4h', '10d', '6w', '2m', '3y'];
const validBucketSpanFormats = ['5000ms', '1s', '2m', '4h', '10d'];
return bucketSpanFormatTests(validBucketSpanFormats, 'bucket_span_valid');
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
*/

import { estimateBucketSpanFactory } from '../../models/bucket_span_estimator';
import { mlFunctionToESAggregation } from '../../../common/util/job_utils';
import { mlFunctionToESAggregation, parseTimeIntervalForJob } from '../../../common/util/job_utils';
import { SKIP_BUCKET_SPAN_ESTIMATION } from '../../../common/constants/validation';
import { parseInterval } from '../../../common/util/parse_interval';

import { validateJobObject } from './validate_job_object';

Expand Down Expand Up @@ -65,8 +64,11 @@ export async function validateBucketSpan(
}

const messages = [];
const parsedBucketSpan = parseInterval(job.analysis_config.bucket_span);
if (parsedBucketSpan === null || parsedBucketSpan.asMilliseconds() === 0) {

// Bucket span must be a valid interval, greater than 0,
// and if specified in ms must be a multiple of 1000ms
const parsedBucketSpan = parseTimeIntervalForJob(job.analysis_config.bucket_span);
if (parsedBucketSpan === null) {
messages.push({ id: 'bucket_span_invalid' });
return messages;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ export async function validateTimeRange(
}

// check for minimum time range (25 buckets or 2 hours, whichever is longer)
const interval = parseInterval(job.analysis_config.bucket_span);
const interval = parseInterval(job.analysis_config.bucket_span, true);
if (interval === null) {
messages.push({ id: 'bucket_span_invalid' });
} else {
Expand Down
1 change: 0 additions & 1 deletion x-pack/plugins/translations/translations/ja-JP.json
Original file line number Diff line number Diff line change
Expand Up @@ -10709,7 +10709,6 @@
"xpack.ml.newJob.wizard.timeRangeStep.timeRangePicker.startDateLabel": "開始日",
"xpack.ml.newJob.wizard.validateJob.bucketSpanMustBeSetErrorMessage": "バケットスパンを設定する必要があります",
"xpack.ml.newJob.wizard.validateJob.duplicatedDetectorsErrorMessage": "重複する検知器が検出されました。",
"xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage": "{value} は有効な時間間隔のフォーマット (例: {tenMinutes}、{oneHour}) ではありません。また、0 よりも大きい数字である必要があります。",
"xpack.ml.newJob.wizard.validateJob.groupNameAlreadyExists": "グループ ID が既に存在します。グループ ID は既存のジョブやグループと同じにできません。",
"xpack.ml.newJob.wizard.validateJob.jobGroupAllowedCharactersDescription": "ジョブグループ名にはアルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります",
"xpack.ml.newJob.wizard.validateJob.jobGroupMaxLengthDescription": "ジョブグループ名は {maxLength, plural, one {# 文字} other {# 文字}} 以内でなければなりません。",
Expand Down
1 change: 0 additions & 1 deletion x-pack/plugins/translations/translations/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -10713,7 +10713,6 @@
"xpack.ml.newJob.wizard.timeRangeStep.timeRangePicker.startDateLabel": "开始日期",
"xpack.ml.newJob.wizard.validateJob.bucketSpanMustBeSetErrorMessage": "必须设置存储桶跨度",
"xpack.ml.newJob.wizard.validateJob.duplicatedDetectorsErrorMessage": "找到重复的检测工具。",
"xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage": "{value} 不是有效的时间间隔格式,例如,{tenMinutes}、{oneHour}。还需要大于零。",
"xpack.ml.newJob.wizard.validateJob.groupNameAlreadyExists": "组 ID 已存在。组 ID 不能与现有作业或组相同。",
"xpack.ml.newJob.wizard.validateJob.jobGroupAllowedCharactersDescription": "作业组名称可以包含小写字母数字(a-z 和 0-9)、连字符或下划线;必须以字母数字字符开头和结尾",
"xpack.ml.newJob.wizard.validateJob.jobGroupMaxLengthDescription": "作业组名称的长度不得超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。",
Expand Down