From 37610548f88f5975e468dee4cdc7a0e29a5a470e Mon Sep 17 00:00:00 2001 From: Britta Weber Date: Tue, 26 May 2015 18:31:18 +0200 Subject: [PATCH] highlighting: don't fail search request when name of highlighted field contains wildcards When we highlight on fields using wildcards then fields might match that cannot be highlighted by the specified highlighter. The whole search request then failed. Instead, check that the field can be highlighted and ignore the field if it can't. In addition ignore the exception thrown by plain highlighter if a field conatins terms larger than 32766. closes #9881 --- .../highlight/FastVectorHighlighter.java | 7 +- .../search/highlight/HighlightPhase.java | 24 ++-- .../search/highlight/Highlighter.java | 4 + .../search/highlight/PlainHighlighter.java | 15 ++- .../search/highlight/PostingsHighlighter.java | 7 +- .../search/highlight/CustomHighlighter.java | 6 + .../highlight/HighlighterSearchTests.java | 126 ++++++++++++++++-- 7 files changed, 168 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java b/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java index 715dacae39dfc..c997624ff60fc 100644 --- a/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java +++ b/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java @@ -63,7 +63,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) { FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; FieldMapper mapper = highlighterContext.mapper; - if (!(mapper.fieldType().storeTermVectors() && mapper.fieldType().storeTermVectorOffsets() && mapper.fieldType().storeTermVectorPositions())) { + if (canHighlight(mapper) == false) { throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); } @@ -177,6 +177,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) { } } + @Override + public boolean canHighlight(FieldMapper fieldMapper) { + return fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions(); + } + private class MapperHighlightEntry { public FragListBuilder fragListBuilder; public FragmentsBuilder fragmentsBuilder; diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 10afac729ba3a..f005f5be7c342 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -21,7 +21,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import org.apache.lucene.index.IndexOptions; import org.apache.lucene.search.Query; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; @@ -45,6 +44,8 @@ */ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { + private static final ImmutableList STANDARD_HIGHLIGHTERS_BY_PRECEDENCE = ImmutableList.of("fvh", "postings", "plain"); + private final Highlighters highlighters; @Inject @@ -91,6 +92,7 @@ public void hitExecute(SearchContext context, HitContext hitContext) { } } + boolean fieldNameContainsWildcards = field.field().contains("*"); for (String fieldName : fieldNamesToHighlight) { FieldMapper fieldMapper = getMapperForField(fieldName, context, hitContext); if (fieldMapper == null) { @@ -99,16 +101,14 @@ public void hitExecute(SearchContext context, HitContext hitContext) { String highlighterType = field.fieldOptions().highlighterType(); if (highlighterType == null) { - boolean useFastVectorHighlighter = fieldMapper.fieldType().storeTermVectors() && fieldMapper.fieldType().storeTermVectorOffsets() && fieldMapper.fieldType().storeTermVectorPositions(); - if (useFastVectorHighlighter) { - highlighterType = "fvh"; - } else if (fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { - highlighterType = "postings"; - } else { - highlighterType = "plain"; + for(String highlighterCandidate : STANDARD_HIGHLIGHTERS_BY_PRECEDENCE) { + if (highlighters.get(highlighterCandidate).canHighlight(fieldMapper)) { + highlighterType = highlighterCandidate; + break; + } } + assert highlighterType != null; } - Highlighter highlighter = highlighters.get(highlighterType); if (highlighter == null) { throw new IllegalArgumentException("unknown highlighter type [" + highlighterType + "] for the field [" + fieldName + "]"); @@ -116,13 +116,17 @@ public void hitExecute(SearchContext context, HitContext hitContext) { Query highlightQuery = field.fieldOptions().highlightQuery() == null ? context.parsedQuery().query() : field.fieldOptions().highlightQuery(); HighlighterContext highlighterContext = new HighlighterContext(fieldName, field, fieldMapper, context, hitContext, highlightQuery); + + if ((highlighter.canHighlight(fieldMapper) == false) && fieldNameContainsWildcards) { + // if several fieldnames matched the wildcard then we want to skip those that we cannot highlight + continue; + } HighlightField highlightField = highlighter.highlight(highlighterContext); if (highlightField != null) { highlightFields.put(highlightField.name(), highlightField); } } } - hitContext.hit().highlightFields(highlightFields); } diff --git a/src/main/java/org/elasticsearch/search/highlight/Highlighter.java b/src/main/java/org/elasticsearch/search/highlight/Highlighter.java index 407cdc7b1ae7c..26c3dc0bf2132 100644 --- a/src/main/java/org/elasticsearch/search/highlight/Highlighter.java +++ b/src/main/java/org/elasticsearch/search/highlight/Highlighter.java @@ -18,6 +18,8 @@ */ package org.elasticsearch.search.highlight; +import org.elasticsearch.index.mapper.FieldMapper; + /** * */ @@ -26,4 +28,6 @@ public interface Highlighter { String[] names(); HighlightField highlight(HighlighterContext highlighterContext); + + boolean canHighlight(FieldMapper fieldMapper); } diff --git a/src/main/java/org/elasticsearch/search/highlight/PlainHighlighter.java b/src/main/java/org/elasticsearch/search/highlight/PlainHighlighter.java index 12d7d08fa8f52..460b2df05cd4d 100644 --- a/src/main/java/org/elasticsearch/search/highlight/PlainHighlighter.java +++ b/src/main/java/org/elasticsearch/search/highlight/PlainHighlighter.java @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.search.highlight.*; +import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.text.Text; @@ -117,7 +118,14 @@ public HighlightField highlight(HighlighterContext highlighterContext) { } } } catch (Exception e) { - throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); + if (e instanceof BytesRefHash.MaxBytesLengthExceededException) { + // this can happen if for example a field is not_analyzed and ignore_above option is set. + // the field will be ignored when indexing but the huge term is still in the source and + // the plain highlighter will parse the source and try to analyze it. + return null; + } else { + throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); + } } if (field.fieldOptions().scoreOrdered()) { CollectionUtil.introSort(fragsList, new Comparator() { @@ -164,6 +172,11 @@ public int compare(TextFragment o1, TextFragment o2) { return null; } + @Override + public boolean canHighlight(FieldMapper fieldMapper) { + return true; + } + private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException { try { if (!tokenStream.hasAttribute(OffsetAttribute.class)) { diff --git a/src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java b/src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java index 0375ff204f796..dcbb810d4ddd3 100644 --- a/src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java +++ b/src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java @@ -50,7 +50,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) { FieldMapper fieldMapper = highlighterContext.mapper; SearchContextHighlight.Field field = highlighterContext.field; - if (fieldMapper.fieldType().indexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { + if (canHighlight(fieldMapper) == false) { throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with positions and offsets in the postings list to be used with postings highlighter"); } @@ -126,6 +126,11 @@ public int compare(Snippet o1, Snippet o2) { return null; } + @Override + public boolean canHighlight(FieldMapper fieldMapper) { + return fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + } + private static String mergeFieldValues(List fieldValues, char valuesSeparator) { //postings highlighter accepts all values in a single string, as offsets etc. need to match with content //loaded from stored fields, we merge all values using a proper separator diff --git a/src/test/java/org/elasticsearch/search/highlight/CustomHighlighter.java b/src/test/java/org/elasticsearch/search/highlight/CustomHighlighter.java index 2845af198a25d..3a9135cb7314c 100644 --- a/src/test/java/org/elasticsearch/search/highlight/CustomHighlighter.java +++ b/src/test/java/org/elasticsearch/search/highlight/CustomHighlighter.java @@ -21,6 +21,7 @@ import com.google.common.collect.Lists; import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.text.Text; +import org.elasticsearch.index.mapper.FieldMapper; import java.util.List; import java.util.Locale; @@ -68,6 +69,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) { return new HighlightField(highlighterContext.fieldName, responses.toArray(new Text[]{})); } + @Override + public boolean canHighlight(FieldMapper fieldMapper) { + return true; + } + private static class CacheEntry { private int position; private int docId; diff --git a/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java b/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java index b3e723f213ad4..7a0ebf57738d1 100644 --- a/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java +++ b/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java @@ -51,12 +51,124 @@ import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight; import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.RegexMatcher.matches; import static org.hamcrest.Matchers.*; @Slow public class HighlighterSearchTests extends ElasticsearchIntegrationTest { + @Test + public void testHighlightingWithWildcardName() throws IOException { + // test the kibana case with * as fieldname that will try highlight all fields including meta fields + XContentBuilder mappings = jsonBuilder(); + mappings.startObject(); + mappings.startObject("type") + .startObject("properties") + .startObject("text") + .field("type", "string") + .field("analyzer", "keyword") + .field("index_options", "offsets") + .field("term_vector", "with_positions_offsets") + .endObject() + .endObject() + .endObject(); + mappings.endObject(); + assertAcked(prepareCreate("test") + .addMapping("type", mappings)); + ensureYellow(); + client().prepareIndex("test", "type", "1") + .setSource(jsonBuilder().startObject().field("text", "text").endObject()) + .get(); + refresh(); + String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"}); + SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get(); + assertHighlight(search, 0, "text", 0, equalTo("text")); + } + + @Test + public void testPlainHighlighterWithLongUnanalyzedStringTerm() throws IOException { + XContentBuilder mappings = jsonBuilder(); + mappings.startObject(); + mappings.startObject("type") + .startObject("properties") + .startObject("long_text") + .field("type", "string") + .field("analyzer", "keyword") + .field("index_options", "offsets") + .field("term_vector", "with_positions_offsets") + .field("ignore_above", 1) + .endObject() + .startObject("text") + .field("type", "string") + .field("analyzer", "keyword") + .field("index_options", "offsets") + .field("term_vector", "with_positions_offsets") + .endObject() + .endObject() + .endObject(); + mappings.endObject(); + assertAcked(prepareCreate("test") + .addMapping("type", mappings)); + ensureYellow(); + // crate a term that is larger than the allowed 32766, index it and then try highlight on it + // the search request should still succeed + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < 32767; i++) { + builder.append('a'); + } + client().prepareIndex("test", "type", "1") + .setSource(jsonBuilder().startObject().field("long_text", builder.toString()).field("text", "text").endObject()) + .get(); + refresh(); + String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"}); + SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get(); + assertHighlight(search, 0, "text", 0, equalTo("text")); + search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("long_text").highlighterType(highlighter)).get(); + assertNoFailures(search); + assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0)); + } + + @Test + public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOException { + XContentBuilder mappings = jsonBuilder(); + mappings.startObject(); + mappings.startObject("type") + .startObject("_source") + .field("enabled", false) + .endObject() + .startObject("properties") + .startObject("unstored_field") + .field("index_options", "offsets") + .field("term_vector", "with_positions_offsets") + .field("type", "string") + .field("store", "no") + .endObject() + .startObject("text") + .field("index_options", "offsets") + .field("term_vector", "with_positions_offsets") + .field("type", "string") + .field("store", "yes") + .endObject() + .endObject() + .endObject(); + mappings.endObject(); + assertAcked(prepareCreate("test") + .addMapping("type", mappings)); + ensureYellow(); + client().prepareIndex("test", "type", "1") + .setSource(jsonBuilder().startObject().field("unstored_text", "text").field("text", "text").endObject()) + .get(); + refresh(); + String highlighter = randomFrom(new String[]{"plain", "postings", "fvh"}); + SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("*").highlighterType(highlighter)).get(); + assertHighlight(search, 0, "text", 0, equalTo("text")); + search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))).addHighlightedField(new Field("unstored_text")).get(); + assertNoFailures(search); + assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0)); + } + + @Test // see #3486 public void testHighTermFrequencyDoc() throws IOException { @@ -1171,12 +1283,11 @@ public void testFastVectorHighlighterShouldFailIfNoTermVectors() throws Exceptio RestStatus.BAD_REQUEST, containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter")); - assertFailures(client().prepareSearch() + //should not fail if there is a wildcard + assertNoFailures(client().prepareSearch() .setQuery(matchPhraseQuery("title", "this is a test")) .addHighlightedField("tit*", 50, 1, 10) - .setHighlighterType("fast-vector-highlighter"), - RestStatus.BAD_REQUEST, - containsString("the field [title] should be indexed with term vector with position offsets to be used with fast vector highlighter")); + .setHighlighterType("fast-vector-highlighter").get()); } @Test @@ -2169,12 +2280,11 @@ public void testPostingsHighlighterShouldFailIfNoOffsets() throws Exception { RestStatus.BAD_REQUEST, containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter")); - assertFailures(client().prepareSearch() + //should not fail if there is a wildcard + assertNoFailures(client().prepareSearch() .setQuery(matchQuery("title", "this is a test")) .addHighlightedField("tit*") - .setHighlighterType("postings"), - RestStatus.BAD_REQUEST, - containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter")); + .setHighlighterType("postings").get()); } @Test