Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix rounding composite aggs on sorted index #57867

Merged
merged 3 commits into from
Jun 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.SearchAfterSortedDocQuery;
Expand All @@ -31,12 +32,15 @@
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.RoaringDocIdSet;
import org.elasticsearch.common.lease.Releasables;
Expand Down Expand Up @@ -231,6 +235,11 @@ private Sort buildIndexSortPrefix(LeafReaderContext context) throws IOException
break;
}
sortFields.add(indexSortField);
if (sourceConfig.valuesSource() instanceof RoundingValuesSource) {
// the rounding "squashes" many values together, that breaks the ordering of sub-values
// so we ignore subsequent source even if they match the index sort.
break;
}
}
return sortFields.isEmpty() ? null : new Sort(sortFields.toArray(new SortField[0]));
}
Expand All @@ -256,6 +265,76 @@ private int computeSortPrefixLen(Sort indexSortPrefix) {
}
}

/**
* Rewrites the provided {@link Sort} to apply rounding on {@link SortField} that target
* {@link RoundingValuesSource}.
*/
private Sort applySortFieldRounding(Sort sort) {
SortField[] sortFields = new SortField[sort.getSort().length];
for (int i = 0; i < sort.getSort().length; i++) {
if (sourceConfigs[i].valuesSource() instanceof RoundingValuesSource) {
LongUnaryOperator round = ((RoundingValuesSource) sourceConfigs[i].valuesSource())::round;
final SortedNumericSortField delegate = (SortedNumericSortField) sort.getSort()[i];
sortFields[i] = new SortedNumericSortField(delegate.getField(), delegate.getNumericType(), delegate.getReverse()) {
@Override
public boolean equals(Object obj) {
return delegate.equals(obj);
}

@Override
public int hashCode() {
return delegate.hashCode();
}

@Override
public FieldComparator<?> getComparator(int numHits, int sortPos) {
return new FieldComparator.LongComparator(1, delegate.getField(), (Long) missingValue) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
NumericDocValues dvs = SortedNumericSelector.wrap(DocValues.getSortedNumeric(context.reader(), field),
delegate.getSelector(), delegate.getNumericType());
return new NumericDocValues() {
@Override
public long longValue() throws IOException {
return round.applyAsLong(dvs.longValue());
}

@Override
public boolean advanceExact(int target) throws IOException {
return dvs.advanceExact(target);
}

@Override
public int docID() {
return dvs.docID();
}

@Override
public int nextDoc() throws IOException {
return dvs.nextDoc();
}

@Override
public int advance(int target) throws IOException {
return dvs.advance(target);
}

@Override
public long cost() {
return dvs.cost();
}
};
}
};
}
};
} else {
sortFields[i] = sort.getSort()[i];
}
}
return new Sort(sortFields);
}

private void processLeafFromQuery(LeafReaderContext ctx, Sort indexSortPrefix) throws IOException {
DocValueFormat[] formats = new DocValueFormat[indexSortPrefix.getSort().length];
for (int i = 0; i < formats.length; i++) {
Expand All @@ -265,11 +344,11 @@ private void processLeafFromQuery(LeafReaderContext ctx, Sort indexSortPrefix) t
Arrays.copyOfRange(rawAfterKey.values(), 0, formats.length));
if (indexSortPrefix.getSort().length < sources.length) {
// include all docs that belong to the partial bucket
fieldDoc.doc = 0;
fieldDoc.doc = -1;
}
BooleanQuery newQuery = new BooleanQuery.Builder()
.add(context.query(), BooleanClause.Occur.MUST)
.add(new SearchAfterSortedDocQuery(indexSortPrefix, fieldDoc), BooleanClause.Occur.FILTER)
.add(new SearchAfterSortedDocQuery(applySortFieldRounding(indexSortPrefix), fieldDoc), BooleanClause.Occur.FILTER)
.build();
Weight weight = context.searcher().createWeight(context.searcher().rewrite(newQuery), ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer scorer = weight.scorer(ctx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ private int compareCurrentWithAfter() {
for (int i = 0; i < arrays.length; i++) {
int cmp = arrays[i].compareCurrentWithAfter();
if (cmp != 0) {
return cmp;
return cmp > 0 ? i+1 : -(i+1);
}
}
return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1948,6 +1948,69 @@ public void testEarlyTermination() throws Exception {
);
}

public void testIndexSortWithDuplicate() throws Exception {
final List<Map<String, List<Object>>> dataset = new ArrayList<>();
dataset.addAll(
Arrays.asList(
createDocument("date", asLong("2020-06-03T00:53:10"), "keyword", "37640"),
createDocument("date", asLong("2020-06-03T00:55:10"), "keyword", "90640"),
createDocument("date", asLong("2020-06-03T01:10:10"), "keyword", "22640"),
createDocument("date", asLong("2020-06-03T01:15:10"), "keyword", "91640"),
createDocument("date", asLong("2020-06-03T01:21:10"), "keyword", "11640"),
createDocument("date", asLong("2020-06-03T01:22:10"), "keyword", "90640"),
createDocument("date", asLong("2020-06-03T01:54:10"), "keyword", "31640")
)
);

for (SortOrder order : SortOrder.values()) {
executeTestCase(true, false, new MatchAllDocsQuery(),
dataset,
() ->
new CompositeAggregationBuilder("name",
Arrays.asList(
new DateHistogramValuesSourceBuilder("date")
.field("date")
.order(order)
.calendarInterval(DateHistogramInterval.days(1)),
new TermsValuesSourceBuilder("keyword").field("keyword")
)).size(3),
(result) -> {
assertEquals(3, result.getBuckets().size());
assertEquals("{date=1591142400000, keyword=31640}", result.afterKey().toString());
assertEquals("{date=1591142400000, keyword=11640}", result.getBuckets().get(0).getKeyAsString());
assertEquals(1L, result.getBuckets().get(0).getDocCount());
assertEquals("{date=1591142400000, keyword=22640}", result.getBuckets().get(1).getKeyAsString());
assertEquals(1L, result.getBuckets().get(1).getDocCount());
assertEquals("{date=1591142400000, keyword=31640}", result.getBuckets().get(2).getKeyAsString());
assertEquals(1L, result.getBuckets().get(2).getDocCount());
}
);

executeTestCase(true, false, new MatchAllDocsQuery(),
dataset,
() ->
new CompositeAggregationBuilder("name",
Arrays.asList(
new DateHistogramValuesSourceBuilder("date")
.field("date")
.order(order)
.calendarInterval(DateHistogramInterval.days(1)),
new TermsValuesSourceBuilder("keyword").field("keyword")
)).aggregateAfter(createAfterKey("date", 1591142400000L, "keyword", "31640")).size(3),
(result) -> {
assertEquals(3, result.getBuckets().size());
assertEquals("{date=1591142400000, keyword=91640}", result.afterKey().toString());
assertEquals("{date=1591142400000, keyword=37640}", result.getBuckets().get(0).getKeyAsString());
assertEquals(1L, result.getBuckets().get(0).getDocCount());
assertEquals("{date=1591142400000, keyword=90640}", result.getBuckets().get(1).getKeyAsString());
assertEquals(2L, result.getBuckets().get(1).getDocCount());
assertEquals("{date=1591142400000, keyword=91640}", result.getBuckets().get(2).getKeyAsString());
assertEquals(1L, result.getBuckets().get(2).getDocCount());
}
);
}
}

private void testSearchCase(List<Query> queries,
List<Map<String, List<Object>>> dataset,
Supplier<CompositeAggregationBuilder> create,
Expand Down Expand Up @@ -2086,9 +2149,6 @@ private static Sort buildIndexSort(List<CompositeValuesSourceBuilder<?>> sources
break;
}
sortFields.add(sortField);
if (sortField instanceof SortedNumericSortField && ((SortedNumericSortField) sortField).getType() == SortField.Type.LONG) {
break;
}
}
while (remainingFieldTypes.size() > 0 && randomBoolean()) {
// Add extra unused sorts
Expand All @@ -2102,7 +2162,7 @@ private static Sort buildIndexSort(List<CompositeValuesSourceBuilder<?>> sources
}
return sortFields.size() > 0 ? new Sort(sortFields.toArray(SortField[]::new)) : null;
}

private static SortField sortFieldFrom(MappedFieldType type) {
if (type instanceof KeywordFieldMapper.KeywordFieldType) {
return new SortedSetSortField(type.name(), false);
Expand Down