[Enterprise Search] Create API Index API (elastic#135877)

* Getting started with an index create API * Added default mappings and filter settings * Added analysis settings, fixed type errors, added simple tests. * [CI] Auto-commit changed files from 'node scripts/eslint --no-cache --fix' * PascalCase it is. * Bubble up the ability to use the default language. * [CI] Auto-commit changed files from 'node scripts/precommit_hook.js --ref HEAD~1..HEAD --fix' * Clean up types * [CI] Auto-commit changed files from 'node scripts/precommit_hook.js --ref HEAD~1..HEAD --fix' * Fix jest specs Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
justinkambic · Jul 13, 2022 · ea7a3b7 · ea7a3b7
1 parent 0081b92
commit ea7a3b7
Show file tree

Hide file tree

Showing 5 changed files with 851 additions and 0 deletions.
diff --git a/x-pack/plugins/enterprise_search/server/lib/indices/create_index.test.ts b/x-pack/plugins/enterprise_search/server/lib/indices/create_index.test.ts
@@ -0,0 +1,188 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { elasticsearchServiceMock } from '@kbn/core/server/mocks';
+
+import { createApiIndex } from './create_index';
+
+describe('createApiIndex lib function', () => {
+  const mockClient = elasticsearchServiceMock.createScopedClusterClient();
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  it('successfully creates an index', async () => {
+    await expect(createApiIndex(mockClient, 'index_name', 'en')).resolves.toEqual({
+      body: {},
+      headers: {
+        'x-elastic-product': 'Elasticsearch',
+      },
+      meta: {},
+      statusCode: 200,
+      warnings: [],
+    });
+    expect(mockClient.asCurrentUser.indices.create).toHaveBeenCalledWith({
+      body: {
+        mappings: {
+          dynamic: true,
+          dynamic_templates: [
+            {
+              all_text_fields: {
+                mapping: {
+                  analyzer: 'iq_text_base',
+                  fields: {
+                    delimiter: {
+                      analyzer: 'iq_text_delimiter',
+                      index_options: 'freqs',
+                      type: 'text',
+                    },
+                    enum: {
+                      ignore_above: 2048,
+                      type: 'keyword',
+                    },
+                    joined: {
+                      analyzer: 'i_text_bigram',
+                      index_options: 'freqs',
+                      search_analyzer: 'q_text_bigram',
+                      type: 'text',
+                    },
+                    prefix: {
+                      analyzer: 'i_prefix',
+                      index_options: 'docs',
+                      search_analyzer: 'q_prefix',
+                      type: 'text',
+                    },
+                    stem: {
+                      analyzer: 'iq_text_stem',
+                      type: 'text',
+                    },
+                  },
+                },
+                match_mapping_type: 'string',
+              },
+            },
+          ],
+        },
+        settings: {
+          analysis: {
+            analyzer: {
+              i_prefix: {
+                filter: ['cjk_width', 'lowercase', 'asciifolding', 'front_ngram'],
+                tokenizer: 'standard',
+                type: 'custom',
+              },
+              i_text_bigram: {
+                filter: [
+                  'cjk_width',
+                  'lowercase',
+                  'asciifolding',
+                  'en-stem-filter',
+                  'bigram_joiner',
+                  'bigram_max_size',
+                ],
+                tokenizer: 'standard',
+                type: 'custom',
+              },
+              iq_text_base: {
+                filter: ['cjk_width', 'lowercase', 'asciifolding', 'en-stop-words-filter'],
+                tokenizer: 'standard',
+                type: 'custom',
+              },
+              iq_text_delimiter: {
+                filter: [
+                  'delimiter',
+                  'cjk_width',
+                  'lowercase',
+                  'asciifolding',
+                  'en-stop-words-filter',
+                  'en-stem-filter',
+                ],
+                tokenizer: 'whitespace',
+                type: 'custom',
+              },
+              iq_text_stem: {
+                filter: [
+                  'cjk_width',
+                  'lowercase',
+                  'asciifolding',
+                  'en-stop-words-filter',
+                  'en-stem-filter',
+                ],
+                tokenizer: 'standard',
+                type: 'custom',
+              },
+              q_prefix: {
+                filter: ['cjk_width', 'lowercase', 'asciifolding'],
+                tokenizer: 'standard',
+                type: 'custom',
+              },
+              q_text_bigram: {
+                filter: [
+                  'cjk_width',
+                  'lowercase',
+                  'asciifolding',
+                  'en-stem-filter',
+                  'bigram_joiner_unigrams',
+                  'bigram_max_size',
+                ],
+                tokenizer: 'standard',
+                type: 'custom',
+              },
+            },
+            filter: {
+              bigram_joiner: {
+                max_shingle_size: 2,
+                output_unigrams: false,
+                token_separator: '',
+                type: 'shingle',
+              },
+              bigram_joiner_unigrams: {
+                max_shingle_size: 2,
+                output_unigrams: true,
+                token_separator: '',
+                type: 'shingle',
+              },
+              bigram_max_size: {
+                max: 16,
+                min: 0,
+                type: 'length',
+              },
+              delimiter: {
+                catenate_all: true,
+                catenate_numbers: true,
+                catenate_words: true,
+                generate_number_parts: true,
+                generate_word_parts: true,
+                preserve_original: false,
+                split_on_case_change: true,
+                split_on_numerics: true,
+                stem_english_possessive: true,
+                type: 'word_delimiter_graph',
+              },
+              'en-stem-filter': {
+                name: 'light_english',
+                language: 'light_english',
+                type: 'stemmer',
+              },
+              'en-stop-words-filter': {
+                stopwords: '_english_',
+                type: 'stop',
+              },
+              front_ngram: {
+                max_gram: 12,
+                min_gram: 1,
+                type: 'edge_ngram',
+              },
+            },
+          },
+        },
+      },
+      index: 'index_name',
+    });
+  });
+});
diff --git a/x-pack/plugins/enterprise_search/server/lib/indices/create_index.ts b/x-pack/plugins/enterprise_search/server/lib/indices/create_index.ts
@@ -0,0 +1,76 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { MappingKeywordProperty, MappingTextProperty } from '@elastic/elasticsearch/lib/api/types';
+import { IScopedClusterClient } from '@kbn/core/server';
+
+import { textAnalysisSettings } from './text_analysis';
+
+const prefixMapping: MappingTextProperty = {
+  search_analyzer: 'q_prefix',
+  analyzer: 'i_prefix',
+  type: 'text',
+  index_options: 'docs',
+};
+
+const delimiterMapping: MappingTextProperty = {
+  analyzer: 'iq_text_delimiter',
+  type: 'text',
+  index_options: 'freqs',
+};
+
+const joinedMapping: MappingTextProperty = {
+  search_analyzer: 'q_text_bigram',
+  analyzer: 'i_text_bigram',
+  type: 'text',
+  index_options: 'freqs',
+};
+
+const enumMapping: MappingKeywordProperty = {
+  ignore_above: 2048,
+  type: 'keyword',
+};
+
+const stemMapping: MappingTextProperty = {
+  analyzer: 'iq_text_stem',
+  type: 'text',
+};
+
+const defaultMappings = {
+  dynamic: true,
+  dynamic_templates: [
+    {
+      all_text_fields: {
+        match_mapping_type: 'string',
+        mapping: {
+          analyzer: 'iq_text_base',
+          fields: {
+            prefix: prefixMapping,
+            delimiter: delimiterMapping,
+            joined: joinedMapping,
+            enum: enumMapping,
+            stem: stemMapping,
+          },
+        },
+      },
+    },
+  ],
+};
+
+export const createApiIndex = async (
+  client: IScopedClusterClient,
+  indexName: string,
+  language: string | undefined
+) => {
+  return await client.asCurrentUser.indices.create({
+    index: indexName,
+    body: {
+      mappings: defaultMappings,
+      settings: textAnalysisSettings(language),
+    },
+  });
+};