Skip to content

Commit

Permalink
[ML] Fixing categorization wizard example results (#54924) (#55438)
Browse files Browse the repository at this point in the history
* [ML] Fixing categorization wizard example results

* moving validation results to class

* cleaning up category analyzer types

* small tweaks

* removing commented out code

* fixing string ids

* small refactor

* improving validation messages

* fixing types

* updating message text

* fixing typo

* adding privileges error

* updating privilege message

* changes based on review

* removing old warning message

* fixing translations

* renaming enum
  • Loading branch information
jgowdyelastic committed Jan 21, 2020
1 parent c96ca20 commit 15e584f
Show file tree
Hide file tree
Showing 20 changed files with 763 additions and 420 deletions.
7 changes: 7 additions & 0 deletions x-pack/legacy/plugins/ml/common/constants/new_job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@ export const DEFAULT_QUERY_DELAY = '60s';

export const SHARED_RESULTS_INDEX_NAME = 'shared';

// Categorization
export const NUMBER_OF_CATEGORY_EXAMPLES = 5;
export const CATEGORY_EXAMPLES_SAMPLE_SIZE = 1000;
export const CATEGORY_EXAMPLES_WARNING_LIMIT = 0.75;
export const CATEGORY_EXAMPLES_ERROR_LIMIT = 0.02;

export enum CATEGORY_EXAMPLES_VALIDATION_STATUS {
VALID = 'valid',
PARTIALLY_VALID = 'partially_valid',
INVALID = 'invalid',
}
29 changes: 29 additions & 0 deletions x-pack/legacy/plugins/ml/common/types/categories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* you may not use this file except in compliance with the Elastic License.
*/

import { CATEGORY_EXAMPLES_VALIDATION_STATUS } from '../constants/new_job';

export type CategoryId = number;

export interface Category {
Expand All @@ -23,3 +25,30 @@ export interface Token {
type: string;
position: number;
}

export interface CategorizationAnalyzer {
char_filter?: any[];
tokenizer?: string;
filter?: any[];
analyzer?: string;
}

export interface CategoryFieldExample {
text: string;
tokens: Token[];
}

export enum VALIDATION_RESULT {
TOKEN_COUNT,
MEDIAN_LINE_LENGTH,
NULL_VALUES,
TOO_MANY_TOKENS,
FAILED_TO_TOKENIZE,
INSUFFICIENT_PRIVILEGES,
}

export interface FieldExampleCheck {
id: VALIDATION_RESULT;
valid: CATEGORY_EXAMPLES_VALIDATION_STATUS;
message: string;
}
24 changes: 23 additions & 1 deletion x-pack/legacy/plugins/ml/common/util/string_utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,17 @@
* you may not use this file except in compliance with the Elastic License.
*/

import { renderTemplate } from './string_utils';
import { renderTemplate, getMedianStringLength } from './string_utils';

const strings: string[] = [
'foo',
'foofoofoofoofoo',
'foofoofoo',
'f',
'f',
'foofoofoofoofoofoofoo',
];
const noStrings: string[] = [];

describe('ML - string utils', () => {
describe('renderTemplate', () => {
Expand All @@ -24,4 +34,16 @@ describe('ML - string utils', () => {
expect(result).toBe('string with 1 replacement, and a 2nd one.');
});
});

describe('getMedianStringLength', () => {
test('test median for string array', () => {
const result = getMedianStringLength(strings);
expect(result).toBe(9);
});

test('test median for no strings', () => {
const result = getMedianStringLength(noStrings);
expect(result).toBe(0);
});
});
});
5 changes: 5 additions & 0 deletions x-pack/legacy/plugins/ml/common/util/string_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,8 @@ export function renderTemplate(str: string, data?: Record<string, string>): stri

return str;
}

export function getMedianStringLength(strings: string[]) {
const sortedStringLengths = strings.map(s => s.length).sort((a, b) => a - b);
return sortedStringLengths[Math.floor(sortedStringLengths.length / 2)] || 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,31 @@ import {
CREATED_BY_LABEL,
DEFAULT_BUCKET_SPAN,
DEFAULT_RARE_BUCKET_SPAN,
CATEGORY_EXAMPLES_VALIDATION_STATUS,
} from '../../../../../../common/constants/new_job';
import { ML_JOB_AGGREGATION } from '../../../../../../common/constants/aggregation_types';
import {
CategorizationAnalyzer,
CategoryFieldExample,
FieldExampleCheck,
} from '../../../../../../common/types/categories';
import { getRichDetectors } from './util/general';
import { CategorizationExamplesLoader, CategoryExample } from '../results_loader';
import { CategorizationAnalyzer, getNewJobDefaults } from '../../../../services/ml_server_info';

type CategorizationAnalyzerType = CategorizationAnalyzer | null;
import { CategorizationExamplesLoader } from '../results_loader';
import { getNewJobDefaults } from '../../../../services/ml_server_info';

export class CategorizationJobCreator extends JobCreator {
protected _type: JOB_TYPE = JOB_TYPE.CATEGORIZATION;
private _createCountDetector: () => void = () => {};
private _createRareDetector: () => void = () => {};
private _examplesLoader: CategorizationExamplesLoader;
private _categoryFieldExamples: CategoryExample[] = [];
private _categoryFieldValid: number = 0;
private _categoryFieldExamples: CategoryFieldExample[] = [];
private _validationChecks: FieldExampleCheck[] = [];
private _overallValidStatus: CATEGORY_EXAMPLES_VALIDATION_STATUS =
CATEGORY_EXAMPLES_VALIDATION_STATUS.INVALID;
private _detectorType: ML_JOB_AGGREGATION.COUNT | ML_JOB_AGGREGATION.RARE =
ML_JOB_AGGREGATION.COUNT;
private _categorizationAnalyzer: CategorizationAnalyzerType = null;
private _defaultCategorizationAnalyzer: CategorizationAnalyzerType;
private _categorizationAnalyzer: CategorizationAnalyzer = {};
private _defaultCategorizationAnalyzer: CategorizationAnalyzer;

constructor(
indexPattern: IndexPattern,
Expand All @@ -46,7 +52,7 @@ export class CategorizationJobCreator extends JobCreator {
this._examplesLoader = new CategorizationExamplesLoader(this, indexPattern, query);

const { anomaly_detectors: anomalyDetectors } = getNewJobDefaults();
this._defaultCategorizationAnalyzer = anomalyDetectors.categorization_analyzer || null;
this._defaultCategorizationAnalyzer = anomalyDetectors.categorization_analyzer || {};
}

public setDefaultDetectorProperties(
Expand Down Expand Up @@ -93,7 +99,7 @@ export class CategorizationJobCreator extends JobCreator {
} else {
delete this._job_config.analysis_config.categorization_field_name;
this._categoryFieldExamples = [];
this._categoryFieldValid = 0;
this._validationChecks = [];
}
}

Expand All @@ -102,31 +108,38 @@ export class CategorizationJobCreator extends JobCreator {
}

public async loadCategorizationFieldExamples() {
const { valid, examples, sampleSize } = await this._examplesLoader.loadExamples();
const {
examples,
sampleSize,
overallValidStatus,
validationChecks,
} = await this._examplesLoader.loadExamples();
this._categoryFieldExamples = examples;
this._categoryFieldValid = valid;
return { valid, examples, sampleSize };
this._validationChecks = validationChecks;
this._overallValidStatus = overallValidStatus;
return { examples, sampleSize, overallValidStatus, validationChecks };
}

public get categoryFieldExamples() {
return this._categoryFieldExamples;
}

public get categoryFieldValid() {
return this._categoryFieldValid;
public get validationChecks() {
return this._validationChecks;
}

public get overallValidStatus() {
return this._overallValidStatus;
}

public get selectedDetectorType() {
return this._detectorType;
}

public set categorizationAnalyzer(analyzer: CategorizationAnalyzerType) {
public set categorizationAnalyzer(analyzer: CategorizationAnalyzer) {
this._categorizationAnalyzer = analyzer;

if (
analyzer === null ||
isEqual(this._categorizationAnalyzer, this._defaultCategorizationAnalyzer)
) {
if (isEqual(this._categorizationAnalyzer, this._defaultCategorizationAnalyzer)) {
delete this._job_config.analysis_config.categorization_analyzer;
} else {
this._job_config.analysis_config.categorization_analyzer = analyzer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { JobCreator, JobCreatorType, isCategorizationJobCreator } from '../job_c
import { populateValidationMessages, checkForExistingJobAndGroupIds } from './util';
import { ExistingJobsAndGroups } from '../../../../services/job_service';
import { cardinalityValidator, CardinalityValidatorResult } from './validators';
import { CATEGORY_EXAMPLES_ERROR_LIMIT } from '../../../../../../common/constants/new_job';
import { CATEGORY_EXAMPLES_VALIDATION_STATUS } from '../../../../../../common/constants/new_job';

// delay start of validation to allow the user to make changes
// e.g. if they are typing in a new value, try not to validate
Expand Down Expand Up @@ -207,7 +207,7 @@ export class JobValidator {
private _runAdvancedValidation() {
if (isCategorizationJobCreator(this._jobCreator)) {
this._advancedValidations.categorizationFieldValid.valid =
this._jobCreator.categoryFieldValid > CATEGORY_EXAMPLES_ERROR_LIMIT;
this._jobCreator.overallValidStatus !== CATEGORY_EXAMPLES_VALIDATION_STATUS.INVALID;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,12 @@

import { IndexPattern } from '../../../../../../../../../../src/plugins/data/public';
import { IndexPatternTitle } from '../../../../../../common/types/kibana';
import { Token } from '../../../../../../common/types/categories';
import { CategorizationJobCreator } from '../job_creator';
import { ml } from '../../../../services/ml_api_service';
import { NUMBER_OF_CATEGORY_EXAMPLES } from '../../../../../../common/constants/new_job';

export interface CategoryExample {
text: string;
tokens: Token[];
}
import {
NUMBER_OF_CATEGORY_EXAMPLES,
CATEGORY_EXAMPLES_VALIDATION_STATUS,
} from '../../../../../../common/constants/new_job';

export class CategorizationExamplesLoader {
private _jobCreator: CategorizationJobCreator;
Expand All @@ -36,20 +33,22 @@ export class CategorizationExamplesLoader {
const analyzer = this._jobCreator.categorizationAnalyzer;
const categorizationFieldName = this._jobCreator.categorizationFieldName;
if (categorizationFieldName === null) {
return { valid: 0, examples: [], sampleSize: 0 };
return {
examples: [],
sampleSize: 0,
overallValidStatus: CATEGORY_EXAMPLES_VALIDATION_STATUS.INVALID,
validationChecks: [],
};
}

const start = Math.floor(
this._jobCreator.start + (this._jobCreator.end - this._jobCreator.start) / 2
);
const resp = await ml.jobs.categorizationFieldExamples(
this._indexPatternTitle,
this._query,
NUMBER_OF_CATEGORY_EXAMPLES,
categorizationFieldName,
this._timeFieldName,
start,
0,
this._jobCreator.start,
this._jobCreator.end,
analyzer
);
return resp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
*/

export { ResultsLoader, Results, ModelItem, Anomaly } from './results_loader';
export { CategorizationExamplesLoader, CategoryExample } from './categorization_examples_loader';
export { CategorizationExamplesLoader } from './categorization_examples_loader';
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,24 @@ import { EuiCallOut, EuiSpacer, EuiCallOutProps } from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { FormattedMessage } from '@kbn/i18n/react';

import { CategorizationAnalyzer } from '../../../../../../../services/ml_server_info';
import { EditCategorizationAnalyzerFlyout } from '../../../common/edit_categorization_analyzer_flyout';
import {
CATEGORY_EXAMPLES_ERROR_LIMIT,
CATEGORY_EXAMPLES_WARNING_LIMIT,
} from '../../../../../../../../../common/constants/new_job';

type CategorizationAnalyzerType = CategorizationAnalyzer | null;
CategorizationAnalyzer,
FieldExampleCheck,
} from '../../../../../../../../../common/types/categories';
import { EditCategorizationAnalyzerFlyout } from '../../../common/edit_categorization_analyzer_flyout';
import { CATEGORY_EXAMPLES_VALIDATION_STATUS } from '../../../../../../../../../common/constants/new_job';

interface Props {
examplesValid: number;
sampleSize: number;
categorizationAnalyzer: CategorizationAnalyzerType;
validationChecks: FieldExampleCheck[];
overallValidStatus: CATEGORY_EXAMPLES_VALIDATION_STATUS;
categorizationAnalyzer: CategorizationAnalyzer;
}

export const ExamplesValidCallout: FC<Props> = ({
examplesValid,
overallValidStatus,
validationChecks,
categorizationAnalyzer,
sampleSize,
}) => {
const percentageText = <PercentageText examplesValid={examplesValid} sampleSize={sampleSize} />;
const analyzerUsed = <AnalyzerUsed categorizationAnalyzer={categorizationAnalyzer} />;

let color: EuiCallOutProps['color'] = 'success';
Expand All @@ -40,15 +37,15 @@ export const ExamplesValidCallout: FC<Props> = ({
}
);

if (examplesValid < CATEGORY_EXAMPLES_ERROR_LIMIT) {
if (overallValidStatus === CATEGORY_EXAMPLES_VALIDATION_STATUS.INVALID) {
color = 'danger';
title = i18n.translate(
'xpack.ml.newJob.wizard.pickFieldsStep.categorizationFieldCalloutTitle.invalid',
{
defaultMessage: 'Selected category field is invalid',
}
);
} else if (examplesValid < CATEGORY_EXAMPLES_WARNING_LIMIT) {
} else if (overallValidStatus === CATEGORY_EXAMPLES_VALIDATION_STATUS.PARTIALLY_VALID) {
color = 'warning';
title = i18n.translate(
'xpack.ml.newJob.wizard.pickFieldsStep.categorizationFieldCalloutTitle.possiblyInvalid',
Expand All @@ -60,45 +57,24 @@ export const ExamplesValidCallout: FC<Props> = ({

return (
<EuiCallOut color={color} title={title}>
{percentageText}
{validationChecks.map((v, i) => (
<div key={i}>{v.message}</div>
))}
<EuiSpacer size="s" />
{analyzerUsed}
</EuiCallOut>
);
};

const PercentageText: FC<{ examplesValid: number; sampleSize: number }> = ({
examplesValid,
sampleSize,
}) => (
<div>
<FormattedMessage
id="xpack.ml.newJob.wizard.pickFieldsStep.categorizationFieldPercentage"
defaultMessage="{number} field {number, plural, zero {value} one {value} other {values}} analyzed, {percentage}% contain valid tokens."
values={{
number: sampleSize,
percentage: Math.floor(examplesValid * 100),
}}
/>
</div>
);

const AnalyzerUsed: FC<{ categorizationAnalyzer: CategorizationAnalyzerType }> = ({
const AnalyzerUsed: FC<{ categorizationAnalyzer: CategorizationAnalyzer }> = ({
categorizationAnalyzer,
}) => {
let analyzer = '';
if (typeof categorizationAnalyzer === null) {
return null;
}

if (typeof categorizationAnalyzer === 'string') {
analyzer = categorizationAnalyzer;
} else {
if (categorizationAnalyzer?.tokenizer !== undefined) {
analyzer = categorizationAnalyzer?.tokenizer!;
} else if (categorizationAnalyzer?.analyzer !== undefined) {
analyzer = categorizationAnalyzer?.analyzer!;
}
if (categorizationAnalyzer?.tokenizer !== undefined) {
analyzer = categorizationAnalyzer.tokenizer;
} else if (categorizationAnalyzer?.analyzer !== undefined) {
analyzer = categorizationAnalyzer.analyzer;
}

return (
Expand Down
Loading

0 comments on commit 15e584f

Please sign in to comment.