Skip to content

Commit

Permalink
[Enterprise Search] Create API Index API (elastic#135877)
Browse files Browse the repository at this point in the history
* Getting started with an index create API

* Added default mappings and filter settings

* Added analysis settings, fixed type errors, added simple tests.

* [CI] Auto-commit changed files from 'node scripts/eslint --no-cache --fix'

* PascalCase it is.

* Bubble up the ability to use the default language.

* [CI] Auto-commit changed files from 'node scripts/precommit_hook.js --ref HEAD~1..HEAD --fix'

* Clean up types

* [CI] Auto-commit changed files from 'node scripts/precommit_hook.js --ref HEAD~1..HEAD --fix'

* Fix jest specs

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
2 people authored and justinkambic committed Jul 13, 2022
1 parent 0081b92 commit ea7a3b7
Show file tree
Hide file tree
Showing 5 changed files with 851 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { elasticsearchServiceMock } from '@kbn/core/server/mocks';

import { createApiIndex } from './create_index';

describe('createApiIndex lib function', () => {
const mockClient = elasticsearchServiceMock.createScopedClusterClient();

beforeEach(() => {
jest.clearAllMocks();
});

it('successfully creates an index', async () => {
await expect(createApiIndex(mockClient, 'index_name', 'en')).resolves.toEqual({
body: {},
headers: {
'x-elastic-product': 'Elasticsearch',
},
meta: {},
statusCode: 200,
warnings: [],
});
expect(mockClient.asCurrentUser.indices.create).toHaveBeenCalledWith({
body: {
mappings: {
dynamic: true,
dynamic_templates: [
{
all_text_fields: {
mapping: {
analyzer: 'iq_text_base',
fields: {
delimiter: {
analyzer: 'iq_text_delimiter',
index_options: 'freqs',
type: 'text',
},
enum: {
ignore_above: 2048,
type: 'keyword',
},
joined: {
analyzer: 'i_text_bigram',
index_options: 'freqs',
search_analyzer: 'q_text_bigram',
type: 'text',
},
prefix: {
analyzer: 'i_prefix',
index_options: 'docs',
search_analyzer: 'q_prefix',
type: 'text',
},
stem: {
analyzer: 'iq_text_stem',
type: 'text',
},
},
},
match_mapping_type: 'string',
},
},
],
},
settings: {
analysis: {
analyzer: {
i_prefix: {
filter: ['cjk_width', 'lowercase', 'asciifolding', 'front_ngram'],
tokenizer: 'standard',
type: 'custom',
},
i_text_bigram: {
filter: [
'cjk_width',
'lowercase',
'asciifolding',
'en-stem-filter',
'bigram_joiner',
'bigram_max_size',
],
tokenizer: 'standard',
type: 'custom',
},
iq_text_base: {
filter: ['cjk_width', 'lowercase', 'asciifolding', 'en-stop-words-filter'],
tokenizer: 'standard',
type: 'custom',
},
iq_text_delimiter: {
filter: [
'delimiter',
'cjk_width',
'lowercase',
'asciifolding',
'en-stop-words-filter',
'en-stem-filter',
],
tokenizer: 'whitespace',
type: 'custom',
},
iq_text_stem: {
filter: [
'cjk_width',
'lowercase',
'asciifolding',
'en-stop-words-filter',
'en-stem-filter',
],
tokenizer: 'standard',
type: 'custom',
},
q_prefix: {
filter: ['cjk_width', 'lowercase', 'asciifolding'],
tokenizer: 'standard',
type: 'custom',
},
q_text_bigram: {
filter: [
'cjk_width',
'lowercase',
'asciifolding',
'en-stem-filter',
'bigram_joiner_unigrams',
'bigram_max_size',
],
tokenizer: 'standard',
type: 'custom',
},
},
filter: {
bigram_joiner: {
max_shingle_size: 2,
output_unigrams: false,
token_separator: '',
type: 'shingle',
},
bigram_joiner_unigrams: {
max_shingle_size: 2,
output_unigrams: true,
token_separator: '',
type: 'shingle',
},
bigram_max_size: {
max: 16,
min: 0,
type: 'length',
},
delimiter: {
catenate_all: true,
catenate_numbers: true,
catenate_words: true,
generate_number_parts: true,
generate_word_parts: true,
preserve_original: false,
split_on_case_change: true,
split_on_numerics: true,
stem_english_possessive: true,
type: 'word_delimiter_graph',
},
'en-stem-filter': {
name: 'light_english',
language: 'light_english',
type: 'stemmer',
},
'en-stop-words-filter': {
stopwords: '_english_',
type: 'stop',
},
front_ngram: {
max_gram: 12,
min_gram: 1,
type: 'edge_ngram',
},
},
},
},
},
index: 'index_name',
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { MappingKeywordProperty, MappingTextProperty } from '@elastic/elasticsearch/lib/api/types';
import { IScopedClusterClient } from '@kbn/core/server';

import { textAnalysisSettings } from './text_analysis';

const prefixMapping: MappingTextProperty = {
search_analyzer: 'q_prefix',
analyzer: 'i_prefix',
type: 'text',
index_options: 'docs',
};

const delimiterMapping: MappingTextProperty = {
analyzer: 'iq_text_delimiter',
type: 'text',
index_options: 'freqs',
};

const joinedMapping: MappingTextProperty = {
search_analyzer: 'q_text_bigram',
analyzer: 'i_text_bigram',
type: 'text',
index_options: 'freqs',
};

const enumMapping: MappingKeywordProperty = {
ignore_above: 2048,
type: 'keyword',
};

const stemMapping: MappingTextProperty = {
analyzer: 'iq_text_stem',
type: 'text',
};

const defaultMappings = {
dynamic: true,
dynamic_templates: [
{
all_text_fields: {
match_mapping_type: 'string',
mapping: {
analyzer: 'iq_text_base',
fields: {
prefix: prefixMapping,
delimiter: delimiterMapping,
joined: joinedMapping,
enum: enumMapping,
stem: stemMapping,
},
},
},
},
],
};

export const createApiIndex = async (
client: IScopedClusterClient,
indexName: string,
language: string | undefined
) => {
return await client.asCurrentUser.indices.create({
index: indexName,
body: {
mappings: defaultMappings,
settings: textAnalysisSettings(language),
},
});
};
Loading

0 comments on commit ea7a3b7

Please sign in to comment.