Skip to content

Commit

Permalink
Consider query when optimizing date rounding (#63403)
Browse files Browse the repository at this point in the history
Before this change we inspected the index when optimizing
`date_histogram` aggregations, precalculating the divisions for the
buckets for the entire range of dates on the index so long as there
aren't a ton of these buckets. This works very well when you query all
of the dates in the index which is quite common - after all, folks
frequently want to query a week of data and have daily indices.

But it doesn't work as well when the index is much larger than the
query. This is quite common when dumping data into ES just to
investigate it but less common in the traditional time series use case.
But even there it still happens, it is just less impactful. Consider
the default query produced by Kibana's Discover app: a range of 15
minutes and a interval of 30 seconds. This optimization saves something
like 3 to 12 nanoseconds per document, so that 15 minutes would have to
have hundreds of millions of documents for it to be impactful.

Anyway, this commit takes the query into account when precalculating the
buckets. Mostly this is good when you have "dirty data". Immagine
loading 80 billion docs in an index to investigate them. Most of them
have dates around 2015 and 2016 but some have dates in 1970 and
others have dates in 2030. These outlier dates are "dirty" "garbage".
Well, without this change a `date_histogram` across many of these docs
is significantly slowed down because we don't precalculate the range due
to the outliers. That's just rude! So this change takes the query into
account.

The bulk of the code change here is plumbing the query into place. It
turns out that its a *ton* of plumbing, so instead of just adding a
`Query` member in hundreds of args replace `QueryShardContext` with a
new `AggregationContext` which does two things:
1. Has the top level `Query`.
2. Exposes just the parts of `QueryShardContext` that we actually need
   to run aggregation. This lets us simplify a few tests now and will
   let us simplify many, many tests later.
  • Loading branch information
nik9000 committed Oct 12, 2020
1 parent b1c80d5 commit 4aaffc6
Show file tree
Hide file tree
Showing 157 changed files with 1,506 additions and 1,220 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
Expand All @@ -49,7 +49,7 @@
* doesn't support any "server" side things like
* {@linkplain Writeable#writeTo(StreamOutput)},
* {@linkplain AggregationBuilder#rewrite(QueryRewriteContext)}, or
* {@linkplain AbstractAggregationBuilder#build(QueryShardContext, AggregatorFactory)}.
* {@linkplain AbstractAggregationBuilder#build(AggregationContext, AggregatorFactory)}.
*/
public class StringStatsAggregationBuilder extends ValuesSourceAggregationBuilder<StringStatsAggregationBuilder> {
public static final String NAME = "string_stats";
Expand Down Expand Up @@ -102,7 +102,7 @@ public BucketCardinality bucketCardinality() {
}

@Override
protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, ValuesSourceConfig config,
protected ValuesSourceAggregatorFactory innerBuild(AggregationContext context, ValuesSourceConfig config,
AggregatorFactory parent, Builder subFactoriesBuilder) throws IOException {
throw new UnsupportedOperationException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilder;

Expand All @@ -44,7 +44,7 @@
* doesn't support any "server" side things like
* {@linkplain Writeable#writeTo(StreamOutput)},
* {@linkplain AggregationBuilder#rewrite(QueryRewriteContext)}, or
* {@linkplain AbstractAggregationBuilder#build(QueryShardContext, AggregatorFactory)}.
* {@linkplain AbstractAggregationBuilder#build(AggregationContext, AggregatorFactory)}.
*/
public class TopMetricsAggregationBuilder extends AbstractAggregationBuilder<TopMetricsAggregationBuilder> {
public static final String NAME = "top_metrics";
Expand Down Expand Up @@ -100,7 +100,7 @@ public BucketCardinality bucketCardinality() {
}

@Override
protected AggregatorFactory doBuild(QueryShardContext queryShardContext, AggregatorFactory parent, Builder subfactoriesBuilder)
protected AggregatorFactory doBuild(AggregationContext context, AggregatorFactory parent, Builder subfactoriesBuilder)
throws IOException {
throw new UnsupportedOperationException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ArrayValuesSourceAggregationBuilder;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;

Expand Down Expand Up @@ -76,11 +76,11 @@ public MultiValueMode multiValueMode() {
}

@Override
protected MatrixStatsAggregatorFactory innerBuild(QueryShardContext queryShardContext,
protected MatrixStatsAggregatorFactory innerBuild(AggregationContext context,
Map<String, ValuesSourceConfig> configs,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
return new MatrixStatsAggregatorFactory(name, configs, multiValueMode, queryShardContext, parent, subFactoriesBuilder, metadata);
return new MatrixStatsAggregatorFactory(name, configs, multiValueMode, context, parent, subFactoriesBuilder, metadata);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
*/
package org.elasticsearch.search.aggregations.matrix.stats;

import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ArrayValuesSourceAggregatorFactory;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
Expand All @@ -41,11 +41,11 @@ final class MatrixStatsAggregatorFactory extends ArrayValuesSourceAggregatorFact
MatrixStatsAggregatorFactory(String name,
Map<String, ValuesSourceConfig> configs,
MultiValueMode multiValueMode,
QueryShardContext queryShardContext,
AggregationContext context,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata) throws IOException {
super(name, configs, queryShardContext, parent, subFactoriesBuilder, metadata);
super(name, configs, context, parent, subFactoriesBuilder, metadata);
this.multiValueMode = multiValueMode;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationInitializationException;
import org.elasticsearch.search.aggregations.AggregatorFactories;
Expand Down Expand Up @@ -190,24 +189,24 @@ public Map<String, Object> missingMap() {
}

@Override
protected final ArrayValuesSourceAggregatorFactory doBuild(QueryShardContext queryShardContext, AggregatorFactory parent,
protected final ArrayValuesSourceAggregatorFactory doBuild(AggregationContext context, AggregatorFactory parent,
Builder subFactoriesBuilder) throws IOException {
Map<String, ValuesSourceConfig> configs = resolveConfig(queryShardContext);
ArrayValuesSourceAggregatorFactory factory = innerBuild(queryShardContext, configs, parent, subFactoriesBuilder);
Map<String, ValuesSourceConfig> configs = resolveConfig(context);
ArrayValuesSourceAggregatorFactory factory = innerBuild(context, configs, parent, subFactoriesBuilder);
return factory;
}

protected Map<String, ValuesSourceConfig> resolveConfig(QueryShardContext queryShardContext) {
protected Map<String, ValuesSourceConfig> resolveConfig(AggregationContext context) {
HashMap<String, ValuesSourceConfig> configs = new HashMap<>();
for (String field : fields) {
ValuesSourceConfig config = ValuesSourceConfig.resolveUnregistered(queryShardContext, userValueTypeHint, field, null,
ValuesSourceConfig config = ValuesSourceConfig.resolveUnregistered(context, userValueTypeHint, field, null,
missingMap.get(field), null, format, CoreValuesSourceType.BYTES);
configs.put(field, config);
}
return configs;
}

protected abstract ArrayValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext,
protected abstract ArrayValuesSourceAggregatorFactory innerBuild(AggregationContext context,
Map<String, ValuesSourceConfig> configs,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder) throws IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

package org.elasticsearch.search.aggregations.support;

import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
Expand All @@ -36,10 +35,10 @@ public abstract class ArrayValuesSourceAggregatorFactory
protected Map<String, ValuesSourceConfig> configs;

public ArrayValuesSourceAggregatorFactory(String name, Map<String, ValuesSourceConfig> configs,
QueryShardContext queryShardContext, AggregatorFactory parent,
AggregationContext context, AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata) throws IOException {
super(name, queryShardContext, parent, subFactoriesBuilder, metadata);
super(name, context, parent, subFactoriesBuilder, metadata);
this.configs = configs;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.join.mapper.ParentIdFieldMapper;
import org.elasticsearch.join.mapper.ParentJoinFieldMapper;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
Expand Down Expand Up @@ -101,36 +101,36 @@ public BucketCardinality bucketCardinality() {
return BucketCardinality.ONE;
}

protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext,
protected ValuesSourceAggregatorFactory innerBuild(AggregationContext context,
ValuesSourceConfig config,
AggregatorFactory parent,
Builder subFactoriesBuilder) throws IOException {
return new ChildrenAggregatorFactory(name, config, childFilter, parentFilter, queryShardContext, parent,
return new ChildrenAggregatorFactory(name, config, childFilter, parentFilter, context, parent,
subFactoriesBuilder, metadata);
}

@Override
protected ValuesSourceConfig resolveConfig(QueryShardContext queryShardContext) {
protected ValuesSourceConfig resolveConfig(AggregationContext context) {
ValuesSourceConfig config;

ParentJoinFieldMapper parentJoinFieldMapper = ParentJoinFieldMapper.getMapper(queryShardContext.getMapperService());
ParentJoinFieldMapper parentJoinFieldMapper = ParentJoinFieldMapper.getMapper(context::getFieldType, context::getMapper);
if (parentJoinFieldMapper == null) {
// Unmapped field case
config = ValuesSourceConfig.resolveUnmapped(defaultValueSourceType(), queryShardContext);
config = ValuesSourceConfig.resolveUnmapped(defaultValueSourceType(), context);
return config;
}

ParentIdFieldMapper parentIdFieldMapper = parentJoinFieldMapper.getParentIdFieldMapper(childType, false);
if (parentIdFieldMapper == null) {
// Unmapped field case
config = ValuesSourceConfig.resolveUnmapped(defaultValueSourceType(), queryShardContext);
config = ValuesSourceConfig.resolveUnmapped(defaultValueSourceType(), context);
return config;
}

parentFilter = parentIdFieldMapper.getParentFilter();
childFilter = parentIdFieldMapper.getChildFilter(childType);
MappedFieldType fieldType = parentIdFieldMapper.fieldType();
config = ValuesSourceConfig.resolveFieldOnly(fieldType, queryShardContext);
config = ValuesSourceConfig.resolveFieldOnly(fieldType, context);
return config;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
package org.elasticsearch.join.aggregations;

import org.apache.lucene.search.Query;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.NonCollectingAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
Expand All @@ -48,7 +48,7 @@ public ChildrenAggregatorFactory(String name,
ValuesSourceConfig config,
Query childFilter,
Query parentFilter,
QueryShardContext context,
AggregationContext context,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.join.mapper.ParentIdFieldMapper;
import org.elasticsearch.join.mapper.ParentJoinFieldMapper;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
Expand Down Expand Up @@ -102,27 +102,27 @@ public BucketCardinality bucketCardinality() {
}

@Override
protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext,
protected ValuesSourceAggregatorFactory innerBuild(AggregationContext context,
ValuesSourceConfig config,
AggregatorFactory parent,
Builder subFactoriesBuilder) throws IOException {
return new ParentAggregatorFactory(name, config, childFilter, parentFilter, queryShardContext, parent,
return new ParentAggregatorFactory(name, config, childFilter, parentFilter, context, parent,
subFactoriesBuilder, metadata);
}

@Override
protected ValuesSourceConfig resolveConfig(QueryShardContext queryShardContext) {
protected ValuesSourceConfig resolveConfig(AggregationContext context) {
ValuesSourceConfig config;
ParentJoinFieldMapper parentJoinFieldMapper = ParentJoinFieldMapper.getMapper(queryShardContext.getMapperService());
ParentJoinFieldMapper parentJoinFieldMapper = ParentJoinFieldMapper.getMapper(context::getFieldType, context::getMapper);
ParentIdFieldMapper parentIdFieldMapper = parentJoinFieldMapper.getParentIdFieldMapper(childType, false);
if (parentIdFieldMapper != null) {
parentFilter = parentIdFieldMapper.getParentFilter();
childFilter = parentIdFieldMapper.getChildFilter(childType);
MappedFieldType fieldType = parentIdFieldMapper.fieldType();
config = ValuesSourceConfig.resolveFieldOnly(fieldType, queryShardContext);
config = ValuesSourceConfig.resolveFieldOnly(fieldType, context);
} else {
// unmapped case
config = ValuesSourceConfig.resolveUnmapped(defaultValueSourceType(), queryShardContext);
config = ValuesSourceConfig.resolveUnmapped(defaultValueSourceType(), context);
}
return config;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
package org.elasticsearch.join.aggregations;

import org.apache.lucene.search.Query;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.AggregatorFactory;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.NonCollectingAggregator;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals;
import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
Expand All @@ -48,11 +48,11 @@ public ParentAggregatorFactory(String name,
ValuesSourceConfig config,
Query childFilter,
Query parentFilter,
QueryShardContext queryShardContext,
AggregationContext context,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder,
Map<String, Object> metadata) throws IOException {
super(name, config, queryShardContext, parent, subFactoriesBuilder, metadata);
super(name, config, context, parent, subFactoriesBuilder, metadata);

this.childFilter = childFilter;
this.parentFilter = parentFilter;
Expand Down
Loading

0 comments on commit 4aaffc6

Please sign in to comment.