diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 5d8c10879b6e7..5698e3ea2313c 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -407,7 +407,7 @@ public List getPreBuiltAnalyzerProviderFactorie public List getPreConfiguredCharFilters() { List filters = new ArrayList<>(); filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new)); - filters.add(PreConfiguredCharFilter.singletonWithVersion("htmlStrip", false, (reader, version) -> { + filters.add(PreConfiguredCharFilter.elasticsearchVersion("htmlStrip", false, (reader, version) -> { if (version.onOrAfter(org.elasticsearch.Version.V_6_3_0)) { deprecationLogger.deprecatedAndMaybeLog("htmlStrip_deprecation", "The [htmpStrip] char filter name is deprecated and will be removed in a future version. " @@ -434,7 +434,7 @@ public List getPreConfiguredTokenFilters() { input -> new CommonGramsFilter(input, CharArraySet.EMPTY_SET))); filters.add(PreConfiguredTokenFilter.singleton("czech_stem", false, CzechStemFilter::new)); filters.add(PreConfiguredTokenFilter.singleton("decimal_digit", true, DecimalDigitFilter::new)); - filters.add(PreConfiguredTokenFilter.singletonWithVersion("delimited_payload_filter", false, (input, version) -> { + filters.add(PreConfiguredTokenFilter.elasticsearchVersion("delimited_payload_filter", false, (input, version) -> { if (version.onOrAfter(Version.V_7_0_0)) { throw new IllegalArgumentException( "[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead"); @@ -453,7 +453,7 @@ public List getPreConfiguredTokenFilters() { filters.add(PreConfiguredTokenFilter.singleton("dutch_stem", false, input -> new SnowballFilter(input, new DutchStemmer()))); filters.add(PreConfiguredTokenFilter.singleton("edge_ngram", false, false, input -> new EdgeNGramTokenFilter(input, 1))); - filters.add(PreConfiguredTokenFilter.singletonWithVersion("edgeNGram", false, false, (reader, version) -> { + filters.add(PreConfiguredTokenFilter.elasticsearchVersion("edgeNGram", false, false, (reader, version) -> { if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) { throw new IllegalArgumentException( "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " @@ -481,7 +481,7 @@ public List getPreConfiguredTokenFilters() { LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT, LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS))); filters.add(PreConfiguredTokenFilter.singleton("ngram", false, false, reader -> new NGramTokenFilter(reader, 1, 2, false))); - filters.add(PreConfiguredTokenFilter.singletonWithVersion("nGram", false, false, (reader, version) -> { + filters.add(PreConfiguredTokenFilter.elasticsearchVersion("nGram", false, false, (reader, version) -> { if (version.onOrAfter(org.elasticsearch.Version.V_7_0_0)) { throw new IllegalArgumentException("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. " + "Please change the filter name to [ngram] instead."); @@ -527,7 +527,7 @@ public List getPreConfiguredTokenFilters() { | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null))); - filters.add(PreConfiguredTokenFilter.singletonWithVersion("word_delimiter_graph", false, false, (input, version) -> { + filters.add(PreConfiguredTokenFilter.elasticsearchVersion("word_delimiter_graph", false, false, (input, version) -> { boolean adjustOffsets = version.onOrAfter(Version.V_7_3_0); return new WordDelimiterGraphFilter(input, adjustOffsets, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, WordDelimiterGraphFilter.GENERATE_WORD_PARTS diff --git a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java index 5776edd69fc83..2f8f013ec5bdd 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java @@ -57,25 +57,6 @@ public static PreConfiguredTokenFilter singleton(String name, boolean useFilterF (tokenStream, version) -> create.apply(tokenStream)); } - /** - * Create a pre-configured token filter that may vary based on the Elasticsearch version. - */ - public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries, - BiFunction create) { - return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, true, CachingStrategy.ONE, - (tokenStream, version) -> create.apply(tokenStream, version)); - } - - /** - * Create a pre-configured token filter that may vary based on the Elasticsearch version. - */ - public static PreConfiguredTokenFilter singletonWithVersion(String name, boolean useFilterForMultitermQueries, - boolean useFilterForParsingSynonyms, - BiFunction create) { - return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, useFilterForParsingSynonyms, CachingStrategy.ONE, - (tokenStream, version) -> create.apply(tokenStream, version)); - } - /** * Create a pre-configured token filter that may vary based on the Lucene version. */ @@ -93,6 +74,16 @@ public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, true, CachingStrategy.ELASTICSEARCH, create); } + /** + * Create a pre-configured token filter that may vary based on the Elasticsearch version. + */ + public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean useFilterForMultitermQueries, + boolean useFilterForParsingSynonyms, + BiFunction create) { + return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, useFilterForParsingSynonyms, + CachingStrategy.ELASTICSEARCH, create); + } + private final boolean useFilterForMultitermQueries; private final boolean allowForSynonymParsing; private final BiFunction create; diff --git a/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 60a2b1640ed5b..2b856579ff15b 100644 --- a/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -181,7 +181,7 @@ static Map setupPreConfiguredTokenFilters(List preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new)); // Add "standard" for old indices (bwc) preConfiguredTokenFilters.register( "standard", - PreConfiguredTokenFilter.singletonWithVersion("standard", true, (reader, version) -> { + PreConfiguredTokenFilter.elasticsearchVersion("standard", true, (reader, version) -> { if (version.before(Version.V_7_0_0)) { deprecationLogger.deprecatedAndMaybeLog("standard_deprecation", "The [standard] token filter is deprecated and will be removed in a future version."); diff --git a/server/src/test/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilterTests.java b/server/src/test/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilterTests.java new file mode 100644 index 0000000000000..8c4c62487c21c --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilterTests.java @@ -0,0 +1,130 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenFilter; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.VersionUtils; + +import java.io.IOException; + +public class PreConfiguredTokenFilterTests extends ESTestCase { + + private final Settings emptyNodeSettings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) + .build(); + + public void testCachingWithSingleton() throws IOException { + PreConfiguredTokenFilter pctf = + PreConfiguredTokenFilter.singleton("singleton", randomBoolean(), + (tokenStream) -> new TokenFilter(tokenStream) { + @Override + public boolean incrementToken() { + return false; + } + }); + + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); + + Version version1 = VersionUtils.randomVersion(random()); + Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1) + .build(); + TokenFilterFactory tff_v1_1 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings1); + TokenFilterFactory tff_v1_2 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings1); + assertSame(tff_v1_1, tff_v1_2); + + Version version2 = randomValueOtherThan(version1, () -> randomFrom(VersionUtils.allVersions())); + Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2) + .build(); + + TokenFilterFactory tff_v2 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "singleton", settings2); + assertSame(tff_v1_1, tff_v2); + } + + public void testCachingWithElasticsearchVersion() throws IOException { + PreConfiguredTokenFilter pctf = + PreConfiguredTokenFilter.elasticsearchVersion("elasticsearch_version", randomBoolean(), + (tokenStream, esVersion) -> new TokenFilter(tokenStream) { + @Override + public boolean incrementToken() { + return false; + } + }); + + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); + + Version version1 = VersionUtils.randomVersion(random()); + Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1) + .build(); + TokenFilterFactory tff_v1_1 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings1); + TokenFilterFactory tff_v1_2 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings1); + assertSame(tff_v1_1, tff_v1_2); + + Version version2 = randomValueOtherThan(version1, () -> randomFrom(VersionUtils.allVersions())); + Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2) + .build(); + + TokenFilterFactory tff_v2 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "elasticsearch_version", settings2); + assertNotSame(tff_v1_1, tff_v2); + } + + public void testCachingWithLuceneVersion() throws IOException { + PreConfiguredTokenFilter pctf = + PreConfiguredTokenFilter.luceneVersion("lucene_version", randomBoolean(), + (tokenStream, luceneVersion) -> new TokenFilter(tokenStream) { + @Override + public boolean incrementToken() { + return false; + } + }); + + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.EMPTY); + + Version version1 = Version.CURRENT; + Settings settings1 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version1) + .build(); + TokenFilterFactory tff_v1_1 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings1); + TokenFilterFactory tff_v1_2 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings1); + assertSame(tff_v1_1, tff_v1_2); + + byte major = VersionUtils.getFirstVersion().major; + Version version2 = Version.fromString(major - 1 + ".0.0"); + Settings settings2 = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version2) + .build(); + + TokenFilterFactory tff_v2 = + pctf.get(indexSettings, TestEnvironment.newEnvironment(emptyNodeSettings), "lucene_version", settings2); + assertNotSame(tff_v1_1, tff_v2); + } +}