From c4bfdb773a13fb29df96df9c683dbcd1e21e4583 Mon Sep 17 00:00:00 2001 From: iverase Date: Mon, 28 Oct 2019 10:08:45 +0100 Subject: [PATCH 01/24] Add HistogramField. --- .../fielddata/AtomicHistogramFieldData.java | 32 ++ .../index/fielddata/HistogramValue.java | 48 ++ .../index/fielddata/HistogramValues.java | 41 ++ .../fielddata/IndexHistogramFieldData.java | 34 ++ .../AbstractHDRPercentilesAggregator.java | 48 +- .../AbstractTDigestPercentilesAggregator.java | 39 +- .../metrics/HDRPercentileRanksAggregator.java | 7 +- .../HDRPercentileRanksAggregatorFactory.java | 6 +- .../metrics/HDRPercentilesAggregator.java | 8 +- .../HDRPercentilesAggregatorFactory.java | 43 +- .../PercentileRanksAggregationBuilder.java | 6 +- .../PercentilesAggregationBuilder.java | 39 +- .../TDigestPercentileRanksAggregator.java | 3 +- ...igestPercentileRanksAggregatorFactory.java | 6 +- .../metrics/TDigestPercentilesAggregator.java | 4 +- .../TDigestPercentilesAggregatorFactory.java | 27 +- .../aggregations/support/ValuesSource.java | 37 ++ .../support/ValuesSourceConfig.java | 14 + .../support/ValuesSourceType.java | 3 +- .../xpack/analytics/AnalyticsPlugin.java | 12 +- .../mapper/HistogramFieldMapper.java | 456 ++++++++++++++++++ ...regatedPercentileRanksAggregatorTests.java | 90 ++++ ...eAggregatedPercentilesAggregatorTests.java | 125 +++++ .../mapper/HistogramAggregationTests.java | 239 +++++++++ .../mapper/HistogramFieldMapperTests.java | 380 +++++++++++++++ ...regatedPercentileRanksAggregatorTests.java | 89 ++++ ...eAggregatedPercentilesAggregatorTests.java | 121 +++++ 27 files changed, 1873 insertions(+), 84 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java create mode 100644 server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java create mode 100644 server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java create mode 100644 server/src/main/java/org/elasticsearch/index/fielddata/IndexHistogramFieldData.java create mode 100644 x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java new file mode 100644 index 0000000000000..8eeaacf44061e --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java @@ -0,0 +1,32 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.fielddata; + + +/** + * {@link AtomicFieldData} specialization for histogram data. + */ +public interface AtomicHistogramFieldData extends AtomicFieldData { + + /** + * Return Histogram values. + */ + HistogramValues getHistogramValues(); + +} diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java new file mode 100644 index 0000000000000..0f35f82bb703a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValue.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata; + +import java.io.IOException; + +/** + * Per-document histogram value. Every value of the histogram consist on + * a value and a count. + */ +public abstract class HistogramValue { + + /** + * Advance this instance to the next value of the histogram + * @return true if there is a next value + */ + public abstract boolean next() throws IOException; + + /** + * the current value of the histogram + * @return the current value of the histogram + */ + public abstract double value(); + + /** + * The current count of the histogram + * @return the current count of the histogram + */ + public abstract int count(); + +} diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java new file mode 100644 index 0000000000000..8e0e0cbc9181a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java @@ -0,0 +1,41 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata; + +import java.io.IOException; + +/** + * Per-segment histogram values. + */ +public abstract class HistogramValues { + + /** + * Advance this instance to the given document id + * @return true if there is a value for this document + */ + public abstract boolean advanceExact(int doc) throws IOException; + + /** + * Get the {@link HistogramValue} associated with the current document. + * The returned {@link HistogramValue} might be reused across calls. + */ + public abstract HistogramValue histogram(); + +} diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/IndexHistogramFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/IndexHistogramFieldData.java new file mode 100644 index 0000000000000..0a8160f427f43 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/fielddata/IndexHistogramFieldData.java @@ -0,0 +1,34 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata; + + +import org.elasticsearch.index.Index; +import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData; + +/** + * Specialization of {@link IndexFieldData} for histograms. + */ +public abstract class IndexHistogramFieldData extends DocValuesIndexFieldData implements IndexFieldData { + + public IndexHistogramFieldData(Index index, String fieldName) { + super(index, fieldName); + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java index 0848a494c7454..e5e7bd8d2b9e2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java @@ -26,12 +26,13 @@ import org.elasticsearch.common.util.ArrayUtils; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.ObjectArray; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; -import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregator; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; @@ -47,13 +48,13 @@ private static int indexOfKey(double[] keys, double key) { } protected final double[] keys; - protected final ValuesSource.Numeric valuesSource; + protected final ValuesSource valuesSource; protected final DocValueFormat format; protected ObjectArray states; protected final int numberOfSignificantValueDigits; protected final boolean keyed; - AbstractHDRPercentilesAggregator(String name, ValuesSource.Numeric valuesSource, SearchContext context, Aggregator parent, + AbstractHDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] keys, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, List pipelineAggregators, Map metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); @@ -77,7 +78,17 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); - final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + if (valuesSource instanceof ValuesSource.Histogram) { + final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx); + return collectHistogramValues(values, bigArrays, sub); + } else { + final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx); + return collectNumeric(values, bigArrays, sub); + } + + } + + private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues values, final BigArrays bigArrays, final LeafBucketCollector sub) { return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { @@ -106,6 +117,35 @@ public void collect(int doc, long bucket) throws IOException { }; } + private LeafBucketCollector collectHistogramValues(final HistogramValues values, final BigArrays bigArrays, final LeafBucketCollector sub) { + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + states = bigArrays.grow(states, bucket + 1); + DoubleHistogram state = states.get(bucket); + if (state == null) { + state = new DoubleHistogram(numberOfSignificantValueDigits); + // Set the histogram to autosize so it can resize itself as + // the data range increases. Resize operations should be + // rare as the histogram buckets are exponential (on the top + // level). In the future we could expose the range as an + // option on the request so the histogram can be fixed at + // initialisation and doesn't need resizing. + state.setAutoResize(true); + states.set(bucket, state); + } + + if (values.advanceExact(doc)) { + final HistogramValue sketch = values.histogram(); + while(sketch.next()) { + state.recordValueWithCount(sketch.value(), sketch.count()); + } + } + } + }; + } + + @Override public boolean hasMetric(String name) { return indexOfKey(keys, Double.parseDouble(name)) >= 0; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java index 15ad622fce58c..b7d90bfbe32a2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java @@ -25,6 +25,8 @@ import org.elasticsearch.common.util.ArrayUtils; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.ObjectArray; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; @@ -45,13 +47,13 @@ private static int indexOfKey(double[] keys, double key) { } protected final double[] keys; - protected final ValuesSource.Numeric valuesSource; + protected final ValuesSource valuesSource; protected final DocValueFormat formatter; protected ObjectArray states; protected final double compression; protected final boolean keyed; - AbstractTDigestPercentilesAggregator(String name, ValuesSource.Numeric valuesSource, SearchContext context, Aggregator parent, + AbstractTDigestPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] keys, double compression, boolean keyed, DocValueFormat formatter, List pipelineAggregators, Map metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); @@ -75,7 +77,17 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); - final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + if (valuesSource instanceof ValuesSource.Histogram) { + final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx); + return collectHistogramValues(values, bigArrays, sub); + } else { + final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx); + return collectNumeric(values, bigArrays, sub); + } + + } + + private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues values, final BigArrays bigArrays, final LeafBucketCollector sub) { return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { @@ -97,6 +109,27 @@ public void collect(int doc, long bucket) throws IOException { }; } + private LeafBucketCollector collectHistogramValues(final HistogramValues values, final BigArrays bigArrays, final LeafBucketCollector sub) { + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + states = bigArrays.grow(states, bucket + 1); + TDigestState state = states.get(bucket); + if (state == null) { + state = new TDigestState(compression); + states.set(bucket, state); + } + + if (values.advanceExact(doc)) { + final HistogramValue sketch = values.histogram(); + while(sketch.next()) { + state.add(sketch.value(), sketch.count()); + } + } + } + }; + } + @Override public boolean hasMetric(String name) { return indexOfKey(keys, Double.parseDouble(name)) >= 0; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java index 881d7a4bf4f4d..3a51ef54a0289 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java @@ -23,6 +23,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.internal.SearchContext; @@ -32,9 +33,9 @@ class HDRPercentileRanksAggregator extends AbstractHDRPercentilesAggregator { - HDRPercentileRanksAggregator(String name, Numeric valuesSource, SearchContext context, Aggregator parent, - double[] percents, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat format, - List pipelineAggregators, Map metaData) throws IOException { + HDRPercentileRanksAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, + double[] percents, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat format, + List pipelineAggregators, Map metaData) throws IOException { super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, format, pipelineAggregators, metaData); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java index dd191e8c457f2..81919404dd665 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java @@ -35,13 +35,13 @@ import java.util.Map; class HDRPercentileRanksAggregatorFactory - extends ValuesSourceAggregatorFactory { + extends ValuesSourceAggregatorFactory { private final double[] values; private final int numberOfSignificantValueDigits; private final boolean keyed; - HDRPercentileRanksAggregatorFactory(String name, ValuesSourceConfig config, double[] values, + HDRPercentileRanksAggregatorFactory(String name, ValuesSourceConfig config, double[] values, int numberOfSignificantValueDigits, boolean keyed, QueryShardContext queryShardContext, AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder, Map metaData) throws IOException { @@ -61,7 +61,7 @@ protected Aggregator createUnmapped(SearchContext searchContext, } @Override - protected Aggregator doCreateInternal(Numeric valuesSource, + protected Aggregator doCreateInternal(ValuesSource valuesSource, SearchContext searchContext, Aggregator parent, boolean collectsFromSingleBucket, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java index f1a4a03b24bb1..b3720f63d01d1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java @@ -23,7 +23,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -32,9 +32,9 @@ class HDRPercentilesAggregator extends AbstractHDRPercentilesAggregator { - HDRPercentilesAggregator(String name, Numeric valuesSource, SearchContext context, Aggregator parent, double[] percents, - int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, - List pipelineAggregators, Map metaData) throws IOException { + HDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, + int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, + List pipelineAggregators, Map metaData) throws IOException { super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, formatter, pipelineAggregators, metaData); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java index de5af206c5398..6f69afcb552c2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; @@ -34,21 +33,21 @@ import java.util.List; import java.util.Map; -class HDRPercentilesAggregatorFactory extends ValuesSourceAggregatorFactory { +class HDRPercentilesAggregatorFactory extends ValuesSourceAggregatorFactory { private final double[] percents; private final int numberOfSignificantValueDigits; private final boolean keyed; HDRPercentilesAggregatorFactory(String name, - ValuesSourceConfig config, - double[] percents, - int numberOfSignificantValueDigits, - boolean keyed, - QueryShardContext queryShardContext, - AggregatorFactory parent, - AggregatorFactories.Builder subFactoriesBuilder, - Map metaData) throws IOException { + ValuesSourceConfig config, + double[] percents, + int numberOfSignificantValueDigits, + boolean keyed, + QueryShardContext queryShardContext, + AggregatorFactory parent, + AggregatorFactories.Builder subFactoriesBuilder, + Map metaData) throws IOException { super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData); this.percents = percents; this.numberOfSignificantValueDigits = numberOfSignificantValueDigits; @@ -57,23 +56,23 @@ class HDRPercentilesAggregatorFactory extends ValuesSourceAggregatorFactory pipelineAggregators, - Map metaData) - throws IOException { + Aggregator parent, + List pipelineAggregators, + Map metaData) + throws IOException { return new HDRPercentilesAggregator(name, null, searchContext, parent, percents, numberOfSignificantValueDigits, keyed, - config.format(), pipelineAggregators, metaData); + config.format(), pipelineAggregators, metaData); } @Override - protected Aggregator doCreateInternal(Numeric valuesSource, - SearchContext searchContext, - Aggregator parent, - boolean collectsFromSingleBucket, - List pipelineAggregators, - Map metaData) throws IOException { + protected Aggregator doCreateInternal(ValuesSource valuesSource, + SearchContext searchContext, + Aggregator parent, + boolean collectsFromSingleBucket, + List pipelineAggregators, + Map metaData) throws IOException { return new HDRPercentilesAggregator(name, valuesSource, searchContext, parent, percents, numberOfSignificantValueDigits, keyed, - config.format(), pipelineAggregators, metaData); + config.format(), pipelineAggregators, metaData); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java index d1a04667c6be6..91c57dc6949f0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java @@ -47,7 +47,7 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; -public class PercentileRanksAggregationBuilder extends LeafOnly { +public class PercentileRanksAggregationBuilder extends LeafOnly { public static final String NAME = PercentileRanks.TYPE_NAME; public static final ParseField VALUES_FIELD = new ParseField("values"); @@ -80,7 +80,7 @@ private static class HDROptions { static { PARSER = new ConstructingObjectParser<>(PercentileRanksAggregationBuilder.NAME, false, (a, context) -> new PercentileRanksAggregationBuilder(context, (List) a[0])); - ValuesSourceParserHelper.declareNumericFields(PARSER, true, false, false); + ValuesSourceParserHelper.declareAnyFields(PARSER, true, true); PARSER.declareDoubleArray(constructorArg(), VALUES_FIELD); PARSER.declareBoolean(PercentileRanksAggregationBuilder::keyed, PercentilesAggregationBuilder.KEYED_FIELD); @@ -240,7 +240,7 @@ public PercentilesMethod method() { } @Override - protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, ValuesSourceConfig config, + protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, ValuesSourceConfig config, AggregatorFactory parent, Builder subFactoriesBuilder) throws IOException { switch (method) { case TDIGEST: diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java index 5b1da34accce6..a2271fd700ae1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java @@ -31,7 +31,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder.LeafOnly; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; @@ -44,7 +43,7 @@ import java.util.Objects; import java.util.function.Consumer; -public class PercentilesAggregationBuilder extends LeafOnly { +public class PercentilesAggregationBuilder extends LeafOnly { public static final String NAME = Percentiles.TYPE_NAME; private static final double[] DEFAULT_PERCENTS = new double[] { 1, 5, 25, 50, 75, 95, 99 }; @@ -59,7 +58,7 @@ private static class TDigestOptions { } private static final ObjectParser TDIGEST_OPTIONS_PARSER = - new ObjectParser<>(PercentilesMethod.TDIGEST.getParseField().getPreferredName(), TDigestOptions::new); + new ObjectParser<>(PercentilesMethod.TDIGEST.getParseField().getPreferredName(), TDigestOptions::new); static { TDIGEST_OPTIONS_PARSER.declareDouble((opts, compression) -> opts.compression = compression, COMPRESSION_FIELD); } @@ -69,21 +68,21 @@ private static class HDROptions { } private static final ObjectParser HDR_OPTIONS_PARSER = - new ObjectParser<>(PercentilesMethod.HDR.getParseField().getPreferredName(), HDROptions::new); + new ObjectParser<>(PercentilesMethod.HDR.getParseField().getPreferredName(), HDROptions::new); static { HDR_OPTIONS_PARSER.declareInt( - (opts, numberOfSigDigits) -> opts.numberOfSigDigits = numberOfSigDigits, - NUMBER_SIGNIFICANT_DIGITS_FIELD); + (opts, numberOfSigDigits) -> opts.numberOfSigDigits = numberOfSigDigits, + NUMBER_SIGNIFICANT_DIGITS_FIELD); } private static final ObjectParser PARSER; static { PARSER = new ObjectParser<>(PercentilesAggregationBuilder.NAME); - ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false); + ValuesSourceParserHelper.declareAnyFields(PARSER, true, true); PARSER.declareDoubleArray( - (b, v) -> b.percentiles(v.stream().mapToDouble(Double::doubleValue).toArray()), - PERCENTS_FIELD); + (b, v) -> b.percentiles(v.stream().mapToDouble(Double::doubleValue).toArray()), + PERCENTS_FIELD); PARSER.declareBoolean(PercentilesAggregationBuilder::keyed, KEYED_FIELD); @@ -263,19 +262,19 @@ public PercentilesMethod method() { } @Override - protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, - ValuesSourceConfig config, - AggregatorFactory parent, - Builder subFactoriesBuilder) throws IOException { + protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, + ValuesSourceConfig config, + AggregatorFactory parent, + Builder subFactoriesBuilder) throws IOException { switch (method) { - case TDIGEST: - return new TDigestPercentilesAggregatorFactory(name, config, percents, compression, keyed, queryShardContext, parent, + case TDIGEST: + return new TDigestPercentilesAggregatorFactory(name, config, percents, compression, keyed, queryShardContext, parent, subFactoriesBuilder, metaData); - case HDR: - return new HDRPercentilesAggregatorFactory(name, config, percents, - numberOfSignificantValueDigits, keyed, queryShardContext, parent, subFactoriesBuilder, metaData); - default: - throw new IllegalStateException("Illegal method [" + method + "]"); + case HDR: + return new HDRPercentilesAggregatorFactory(name, config, percents, + numberOfSignificantValueDigits, keyed, queryShardContext, parent, subFactoriesBuilder, metaData); + default: + throw new IllegalStateException("Illegal method [" + method + "]"); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java index 69e385151eae3..f902360971c0b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java @@ -22,6 +22,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.internal.SearchContext; @@ -32,7 +33,7 @@ class TDigestPercentileRanksAggregator extends AbstractTDigestPercentilesAggregator { TDigestPercentileRanksAggregator(String name, - Numeric valuesSource, + ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java index 5138ff2741680..6fe529cee3112 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java @@ -35,14 +35,14 @@ import java.util.Map; class TDigestPercentileRanksAggregatorFactory - extends ValuesSourceAggregatorFactory { + extends ValuesSourceAggregatorFactory { private final double[] percents; private final double compression; private final boolean keyed; TDigestPercentileRanksAggregatorFactory(String name, - ValuesSourceConfig config, + ValuesSourceConfig config, double[] percents, double compression, boolean keyed, @@ -66,7 +66,7 @@ protected Aggregator createUnmapped(SearchContext searchContext, } @Override - protected Aggregator doCreateInternal(Numeric valuesSource, + protected Aggregator doCreateInternal(ValuesSource valuesSource, SearchContext searchContext, Aggregator parent, boolean collectsFromSingleBucket, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregator.java index 81bbe15e82150..5140011808796 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregator.java @@ -22,7 +22,7 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -32,7 +32,7 @@ class TDigestPercentilesAggregator extends AbstractTDigestPercentilesAggregator { TDigestPercentilesAggregator(String name, - Numeric valuesSource, + ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java index 252a3b4ac3870..c9ba61331112c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; @@ -35,13 +34,13 @@ import java.util.Map; class TDigestPercentilesAggregatorFactory - extends ValuesSourceAggregatorFactory { + extends ValuesSourceAggregatorFactory { private final double[] percents; private final double compression; private final boolean keyed; - TDigestPercentilesAggregatorFactory(String name, ValuesSourceConfig config, double[] percents, + TDigestPercentilesAggregatorFactory(String name, ValuesSourceConfig config, double[] percents, double compression, boolean keyed, QueryShardContext queryShardContext, AggregatorFactory parent, AggregatorFactories.Builder subFactoriesBuilder, Map metaData) throws IOException { super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData); @@ -52,22 +51,22 @@ class TDigestPercentilesAggregatorFactory @Override protected Aggregator createUnmapped(SearchContext searchContext, - Aggregator parent, - List pipelineAggregators, - Map metaData) throws IOException { + Aggregator parent, + List pipelineAggregators, + Map metaData) throws IOException { return new TDigestPercentilesAggregator(name, null, searchContext, parent, percents, compression, keyed, config.format(), - pipelineAggregators, metaData); + pipelineAggregators, metaData); } @Override - protected Aggregator doCreateInternal(Numeric valuesSource, - SearchContext searchContext, - Aggregator parent, - boolean collectsFromSingleBucket, - List pipelineAggregators, - Map metaData) throws IOException { + protected Aggregator doCreateInternal(ValuesSource valuesSource, + SearchContext searchContext, + Aggregator parent, + boolean collectsFromSingleBucket, + List pipelineAggregators, + Map metaData) throws IOException { return new TDigestPercentilesAggregator(name, valuesSource, searchContext, parent, percents, compression, keyed, config.format(), - pipelineAggregators, metaData); + pipelineAggregators, metaData); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java index 19a607a0f177c..3d92501a9071c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java @@ -33,8 +33,10 @@ import org.elasticsearch.index.fielddata.AbstractSortingNumericDocValues; import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData; import org.elasticsearch.index.fielddata.DocValueBits; +import org.elasticsearch.index.fielddata.HistogramValues; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; +import org.elasticsearch.index.fielddata.IndexHistogramFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData; import org.elasticsearch.index.fielddata.MultiGeoPointValues; @@ -552,4 +554,39 @@ public org.elasticsearch.index.fielddata.MultiGeoPointValues geoPointValues(Leaf } } + + public abstract static class Histogram extends ValuesSource { + + public abstract HistogramValues getHistogramValues(LeafReaderContext context); + + public static class Fielddata extends Histogram { + + protected final IndexHistogramFieldData indexFieldData; + + public Fielddata(IndexHistogramFieldData indexFieldData) { + this.indexFieldData = indexFieldData; + } + + @Override + public SortedBinaryDocValues bytesValues(LeafReaderContext context) { + return indexFieldData.load(context).getBytesValues(); + } + + @Override + public DocValueBits docsWithValue(LeafReaderContext context) throws IOException { + HistogramValues values = getHistogramValues(context); + return new DocValueBits() { + @Override + public boolean advanceExact(int doc) throws IOException { + return values.advanceExact(doc); + } + }; + } + + public HistogramValues getHistogramValues(LeafReaderContext context) { + return indexFieldData.load(context).getHistogramValues(); + } + } + } + } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java index d906260c75694..ac004a812ccd8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; +import org.elasticsearch.index.fielddata.IndexHistogramFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData; import org.elasticsearch.index.mapper.DateFieldMapper; @@ -118,6 +119,8 @@ public static ValuesSourceConfig resolve( config = new ValuesSourceConfig<>(ValuesSourceType.GEOPOINT); } else if (fieldType instanceof RangeFieldMapper.RangeFieldType) { config = new ValuesSourceConfig<>(ValuesSourceType.RANGE); + } else if (indexFieldData instanceof IndexHistogramFieldData) { + config = new ValuesSourceConfig<>(ValuesSourceType.HISTOGRAM); } else { if (valueType == null) { config = new ValuesSourceConfig<>(ValuesSourceType.BYTES); @@ -325,6 +328,9 @@ private VS originalValuesSource() { if (valueSourceType() == ValuesSourceType.RANGE) { return (VS) rangeField(); } + if (valueSourceType() == ValuesSourceType.HISTOGRAM) { + return (VS) histogramField(); + } // falling back to bytes values return (VS) bytesField(); } @@ -384,4 +390,12 @@ private ValuesSource rangeField() { RangeFieldMapper.RangeFieldType rangeFieldType = (RangeFieldMapper.RangeFieldType)fieldType; return new ValuesSource.Range(fieldContext().indexFieldData(), rangeFieldType.rangeType()); } + + private ValuesSource histogramField() { + if (!(fieldContext().indexFieldData() instanceof IndexHistogramFieldData)) { + throw new IllegalArgumentException("Expected histogram type on field [" + fieldContext().field() + + "], but got [" + fieldContext().fieldType().typeName() + "]"); + } + return new ValuesSource.Histogram.Fielddata((IndexHistogramFieldData) fieldContext().indexFieldData()); + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceType.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceType.java index 93398abe99e9a..4a7bf695661b5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceType.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceType.java @@ -31,7 +31,8 @@ public enum ValuesSourceType implements Writeable { NUMERIC, BYTES, GEOPOINT, - RANGE; + RANGE, + HISTOGRAM; public static ValuesSourceType fromString(String name) { return valueOf(name.trim().toUpperCase(Locale.ROOT)); diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java index 446f47ae7a12b..6fdd015c74de4 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java @@ -7,10 +7,13 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.plugins.ActionPlugin; +import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.SearchPlugin; +import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper; import org.elasticsearch.xpack.core.XPackPlugin; import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction; import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction; @@ -22,12 +25,14 @@ import org.elasticsearch.xpack.analytics.cumulativecardinality.CumulativeCardinalityPipelineAggregator; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.concurrent.atomic.AtomicLong; import static java.util.Collections.singletonList; -public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugin { +public class AnalyticsPlugin extends Plugin implements SearchPlugin, ActionPlugin, MapperPlugin { // TODO this should probably become more structured once Analytics plugin has more than just one agg public static AtomicLong cumulativeCardUsage = new AtomicLong(0); @@ -52,4 +57,9 @@ public List getPipelineAggregations() { new ActionHandler<>(XPackInfoFeatureAction.ANALYTICS, AnalyticsInfoTransportAction.class), new ActionHandler<>(AnalyticsStatsAction.INSTANCE, TransportAnalyticsStatsAction.class)); } + + @Override + public Map getMappers() { + return Collections.singletonMap(HistogramFieldMapper.CONTENT_TYPE, new HistogramFieldMapper.TypeParser()); + } } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java new file mode 100644 index 0000000000000..0aa1cb82a5bf5 --- /dev/null +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -0,0 +1,456 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + + +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Explicit; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.ByteBufferStreamInput; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.fielddata.AtomicHistogramFieldData; +import org.elasticsearch.index.fielddata.FieldData; +import org.elasticsearch.index.fielddata.HistogramValue; +import org.elasticsearch.index.fielddata.HistogramValues; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fielddata.IndexHistogramFieldData; +import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.index.mapper.ArrayValueMapperParser; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.QueryShardException; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.search.MultiValueMode; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.index.mapper.TypeParsers.parseField; + +/** + * Field Mapper for pre-aggregated histograms. + * + */ +public class HistogramFieldMapper extends FieldMapper implements ArrayValueMapperParser { + public static final String CONTENT_TYPE = "histogram"; + + public static class Names { + public static final String IGNORE_MALFORMED = "ignore_malformed"; + } + + public static class Defaults { + public static final Explicit IGNORE_MALFORMED = new Explicit<>(false, false); + public static final HDRPercentilesFieldType FIELD_TYPE = new HDRPercentilesFieldType(); + + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setHasDocValues(true); + FIELD_TYPE.setIndexOptions(IndexOptions.NONE); + FIELD_TYPE.freeze(); + } + } + + public static final ParseField COUNTS_FIELD = new ParseField("counts"); + public static final ParseField VALUES_FIELD = new ParseField("values"); + + public static class Builder extends FieldMapper.Builder { + protected Boolean ignoreMalformed; + + public Builder(String name) { + super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); + builder = this; + } + + public Builder ignoreMalformed(boolean ignoreMalformed) { + this.ignoreMalformed = ignoreMalformed; + return builder; + } + + protected Explicit ignoreMalformed(BuilderContext context) { + if (ignoreMalformed != null) { + return new Explicit<>(ignoreMalformed, true); + } + if (context.indexSettings() != null) { + return new Explicit<>(IGNORE_MALFORMED_SETTING.get(context.indexSettings()), false); + } + return HistogramFieldMapper.Defaults.IGNORE_MALFORMED; + } + + @Override + public Builder store(boolean store) { + if (store) { + throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support " + + "stored fields"); + } + return super.store(false); + } + + @Override + public Builder index(boolean index) { + if (index) { + throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support indexing"); + } + return super.store(false); + } + + @Override + public Builder indexOptions(IndexOptions indexOptions) { + if (indexOptions.equals(IndexOptions.NONE) == false) { + throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support " + + "index options, got [index_options]=" + indexOptionToString(indexOptions)); + } + return super.indexOptions(indexOptions); + } + + public HistogramFieldMapper build(BuilderContext context, String simpleName, MappedFieldType fieldType, + MappedFieldType defaultFieldType, Settings indexSettings, + MultiFields multiFields, Explicit ignoreMalformed, CopyTo copyTo) { + setupFieldType(context); + return new HistogramFieldMapper(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, + ignoreMalformed, copyTo); + } + + @Override + public HistogramFieldMapper build(BuilderContext context) { + return build(context, name, fieldType, defaultFieldType, context.indexSettings(), + multiFieldsBuilder.build(this, context), ignoreMalformed(context), copyTo); + } + } + + public static class TypeParser implements Mapper.TypeParser { + @Override + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) + throws MapperParsingException { + Builder builder = new HistogramFieldMapper.Builder(name); + parseField(builder, name, node, parserContext); + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + Map.Entry entry = iterator.next(); + String propName = entry.getKey(); + Object propNode = entry.getValue(); + if (propName.equals(Names.IGNORE_MALFORMED)) { + builder.ignoreMalformed(XContentMapValues.nodeBooleanValue(propNode, name + "." + Names.IGNORE_MALFORMED)); + iterator.remove(); + } + } + return builder; + } + } + + protected Explicit ignoreMalformed; + + public HistogramFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, + Settings indexSettings, MultiFields multiFields, Explicit ignoreMalformed, CopyTo copyTo) { + super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); + this.ignoreMalformed = ignoreMalformed; + } + + @Override + protected void doMerge(Mapper mergeWith) { + super.doMerge(mergeWith); + HistogramFieldMapper gpfmMergeWith = (HistogramFieldMapper) mergeWith; + if (gpfmMergeWith.ignoreMalformed.explicit()) { + this.ignoreMalformed = gpfmMergeWith.ignoreMalformed; + } + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + @Override + protected void parseCreateField(ParseContext context, List fields) throws IOException { + throw new UnsupportedOperationException("Parsing is implemented in parse(), this method should NEVER be called"); + } + + public static class HDRPercentilesFieldType extends MappedFieldType { + public HDRPercentilesFieldType() { + } + + HDRPercentilesFieldType(HDRPercentilesFieldType ref) { + super(ref); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public MappedFieldType clone() { + return new HDRPercentilesFieldType(this); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { + failIfNoDocValues(); + return new IndexFieldData.Builder() { + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public IndexFieldData build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache, CircuitBreakerService breakerService, MapperService mapperService) { + + return new IndexHistogramFieldData(indexSettings.getIndex(), fieldType.name()) { + + @Override + public AtomicHistogramFieldData load(LeafReaderContext context) { + return new AtomicHistogramFieldData() { + @Override + public HistogramValues getHistogramValues() { + try { + final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldName); + return new HistogramValues() { + @Override + public boolean advanceExact(int doc) throws IOException { + return values.advanceExact(doc); + } + + @Override + public HistogramValue histogram() { + try { + return getHistogramValue(values.binaryValue()); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc value", e); + } + } + }; + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + + } + + @Override + public ScriptDocValues getScriptValues() { + return new ScriptDocValues.Strings(getBytesValues()); + } + + @Override + public SortedBinaryDocValues getBytesValues() { + try { + final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldName); + return FieldData.singleton(values); + } catch (IOException e) { + throw new IllegalStateException("Cannot load doc values", e); + } + } + + @Override + public long ramBytesUsed() { + return 0; // Unknown + } + + @Override + public void close() { + + } + }; + } + + @Override + public AtomicHistogramFieldData loadDirect(LeafReaderContext context) throws Exception { + return load(context); + } + + @Override + public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) { + return null; + } + }; + } + + private HistogramValue getHistogramValue(final BytesRef bytesRef) throws IOException { + final ByteBufferStreamInput streamInput = new ByteBufferStreamInput(ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length)); + final int numValues = streamInput.readVInt(); + return new HistogramValue() { + double value; + int count; + int position; + boolean isExhausted; + + @Override + public boolean next() throws IOException { + if (position < numValues) { + position++; + value = streamInput.readDouble(); + count = streamInput.readVInt(); + return true; + } + isExhausted = true; + return false; + } + + @Override + public double value() { + if (isExhausted) { + throw new IllegalArgumentException("histogram already exhausted"); + } + return value; + } + + @Override + public int count() { + if (isExhausted) { + throw new IllegalArgumentException("histogram already exhausted"); + } + return count; + } + }; + } + + }; + } + + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + throw new QueryShardException(context, "field " + name() + " of type [" + CONTENT_TYPE + "] has no doc values and cannot be searched"); + } + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + throw new QueryShardException(context, "[" + CONTENT_TYPE + "] field do not support searching, use dedicated aggregations instead: [" + + name() + "]"); + } + } + + @Override + public void parse(ParseContext context) throws IOException { + context.path().add(simpleName()); + try { + List values = null; + List counts = null; + XContentParser.Token token = context.parser().currentToken(); + if (token != XContentParser.Token.START_OBJECT) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected an [" + XContentParser.Token.START_OBJECT.name() + + "] but got [" + token.name() + "]"); + } + token = context.parser().nextToken(); + while (token != XContentParser.Token.END_OBJECT) { + if (token != XContentParser.Token.FIELD_NAME) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected a field but got " + context.parser().currentName()); + } + String fieldName = context.parser().currentName(); + if (fieldName.equals(VALUES_FIELD.getPreferredName())) { + token = context.parser().nextToken(); + //should be an array + if (token != XContentParser.Token.START_ARRAY) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected an [" + XContentParser.Token.START_ARRAY.name() + + "] but got [" + token.name() + "]"); + } + values = new ArrayList<>(); + token = context.parser().nextToken(); + while (token != XContentParser.Token.END_ARRAY) { + values.add(context.parser().doubleValue()); + token = context.parser().nextToken(); + } + } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { + token = context.parser().nextToken(); + //should be an array + if (token != XContentParser.Token.START_ARRAY) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected an [" + XContentParser.Token.START_ARRAY.name() + + "] but got [" + token.name() + "]"); + } + counts = new ArrayList<>(); + token = context.parser().nextToken(); + while (token != XContentParser.Token.END_ARRAY) { + counts.add(context.parser().intValue()); + token = context.parser().nextToken(); + } + } else { + throw new MapperParsingException("error parsing field [" + + name() + "], with unknown parameter [" + fieldName + "]"); + } + token = context.parser().nextToken(); + } + if (values == null) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected field called [" + VALUES_FIELD.getPreferredName() + "]"); + } + if (counts == null) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected field called [" + COUNTS_FIELD.getPreferredName() + "]"); + } + if (values.size() != counts.size()) { + throw new MapperParsingException("error parsing field [" + + name() + "], expected same length from [" + VALUES_FIELD.getPreferredName() +"] and " + + "[" + COUNTS_FIELD.getPreferredName() +"] but got [" + values.size() + " != " + counts.size() +"]"); + } + if (values.size() == 0) { + throw new MapperParsingException("error parsing field [" + + name() + "], arrays for values and counts cannot be empty"); + } + if (fieldType().hasDocValues()) { + BytesStreamOutput streamOutput = new BytesStreamOutput(); + streamOutput.writeVInt(values.size()); + for (int i = 0; i < values.size(); i++) { + streamOutput.writeDouble(values.get(i)); + if (counts.get(i) < 0) { + throw new MapperParsingException("error parsing field [" + + name() + "], ["+ COUNTS_FIELD + "] elements must be >= 0 but got " + counts.get(i)); + } + streamOutput.writeVInt(counts.get(i)); + } + + Field field = new BinaryDocValuesField(simpleName(), streamOutput.bytes().toBytesRef()); + streamOutput.close(); + context.doc().add(field); + } + + } catch (Exception ex) { + if (ignoreMalformed.value()) { + return; + } + throw new MapperParsingException("failed to parse field [{}] of type [{}]", ex, fieldType().name(), fieldType().typeName()); + } + + context.path().remove(); + } + + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + super.doXContentBody(builder, includeDefaults, params); + if (includeDefaults || ignoreMalformed.explicit()) { + builder.field(Names.IGNORE_MALFORMED, ignoreMalformed.value()); + } + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java new file mode 100644 index 0000000000000..fe1bd57d1a1f5 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import org.HdrHistogram.DoubleHistogram; +import org.HdrHistogram.DoubleHistogramIterationValue; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.Percentile; +import org.elasticsearch.search.aggregations.metrics.PercentileRanks; +import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + + +public class HDRPreAggregatedPercentileRanksAggregatorTests extends AggregatorTestCase { + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + DoubleHistogram histogram = new DoubleHistogram(3);//default + for (double value : values) { + histogram.recordValue(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); + List histogramValues = StreamSupport.stream(recordedValues.spliterator(), false).collect(Collectors.toList()); + streamOutput.writeVInt(histogramValues.size()); + Iterator iterator = recordedValues.iterator(); + while (iterator.hasNext()) { + DoubleHistogramIterationValue value = iterator.next(); + double d = value.getValueIteratedTo(); + streamOutput.writeDouble(d); + long count = value.getCountAtValueIteratedTo(); + streamOutput.writeVInt(Math.toIntExact(count)); + + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + public void testSimple() throws IOException { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + Document doc = new Document(); + doc.add(getDocValue("field", new double[] {3, 0.2, 10})); + w.addDocument(doc); + + PercentileRanksAggregationBuilder aggBuilder = new PercentileRanksAggregationBuilder("my_agg", new double[]{0.1, 0.5, 12}) + .field("field") + .method(PercentilesMethod.HDR); + MappedFieldType fieldType = new HistogramFieldMapper.Builder("field").fieldType(); + fieldType.setName("field"); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + PercentileRanks ranks = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType); + Iterator rankIterator = ranks.iterator(); + Percentile rank = rankIterator.next(); + assertEquals(0.1, rank.getValue(), 0d); + assertThat(rank.getPercent(), Matchers.equalTo(0d)); + rank = rankIterator.next(); + assertEquals(0.5, rank.getValue(), 0d); + assertThat(rank.getPercent(), Matchers.greaterThan(0d)); + assertThat(rank.getPercent(), Matchers.lessThan(100d)); + rank = rankIterator.next(); + assertEquals(12, rank.getValue(), 0d); + assertThat(rank.getPercent(), Matchers.equalTo(100d)); + assertFalse(rankIterator.hasNext()); + assertTrue(AggregationInspectionHelper.hasValue((InternalHDRPercentileRanks)ranks)); + } + } + } + +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java new file mode 100644 index 0000000000000..b5988dd973ff8 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java @@ -0,0 +1,125 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import org.HdrHistogram.DoubleHistogram; +import org.HdrHistogram.DoubleHistogramIterationValue; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentiles; +import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static java.util.Collections.singleton; + +public class HDRPreAggregatedPercentilesAggregatorTests extends AggregatorTestCase { + + private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws IOException { + DoubleHistogram histogram = new DoubleHistogram(3);//default + for (int value : values) { + histogram.recordValue(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); + List histogramValues = StreamSupport.stream(recordedValues.spliterator(), false).collect(Collectors.toList()); + streamOutput.writeVInt(histogramValues.size()); + Iterator iterator = recordedValues.iterator(); + while (iterator.hasNext()) { + DoubleHistogramIterationValue value = iterator.next(); + double d = value.getValueIteratedTo(); + streamOutput.writeDouble(d); + long count = value.getCountAtValueIteratedTo(); + streamOutput.writeVInt(Math.toIntExact(count)); + + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + public void testNoMatchingField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("wrong_number", new int[]{7, 1}))); + }, hdr -> { + //assertEquals(0L, hdr.state.getTotalCount()); + assertFalse(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testSomeMatchesBinaryDocValues() throws IOException { + testCase(new DocValuesFieldExistsQuery("number"), iw -> { + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + }, hdr -> { + //assertEquals(4L, hdr.state.getTotalCount()); + double approximation = 0.05d; + assertEquals(10.0d, hdr.percentile(25), approximation); + assertEquals(20.0d, hdr.percentile(50), approximation); + assertEquals(40.0d, hdr.percentile(75), approximation); + assertEquals(60.0d, hdr.percentile(99), approximation); + assertTrue(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testSomeMatchesMultiBinaryDocValues() throws IOException { + testCase(new DocValuesFieldExistsQuery("number"), iw -> { + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + }, hdr -> { + //assertEquals(16L, hdr.state.getTotalCount()); + double approximation = 0.05d; + assertEquals(10.0d, hdr.percentile(25), approximation); + assertEquals(20.0d, hdr.percentile(50), approximation); + assertEquals(40.0d, hdr.percentile(75), approximation); + assertEquals(60.0d, hdr.percentile(99), approximation); + assertTrue(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + private void testCase(Query query, CheckedConsumer buildIndex, + Consumer verify) throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + buildIndex.accept(indexWriter); + } + + try (IndexReader indexReader = DirectoryReader.open(directory)) { + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + + PercentilesAggregationBuilder builder = + new PercentilesAggregationBuilder("test").field("number").method(PercentilesMethod.HDR); + + MappedFieldType fieldType = new HistogramFieldMapper.Builder("number").fieldType(); + fieldType.setName("number"); + Aggregator aggregator = createAggregator(builder, indexSearcher, fieldType); + aggregator.preCollection(); + indexSearcher.search(query, aggregator); + aggregator.postCollection(); + verify.accept((InternalHDRPercentiles) aggregator.buildAggregation(0L)); + + } + } + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java new file mode 100644 index 0000000000000..e63b17abd8831 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java @@ -0,0 +1,239 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + + +import com.tdunning.math.stats.Centroid; +import org.HdrHistogram.DoubleHistogram; +import org.HdrHistogram.DoubleHistogramIterationValue; +import org.apache.lucene.util.TestUtil; +import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; + +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.plugins.Plugin; + +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentiles; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentiles; +import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xpack.analytics.AnalyticsPlugin; +import org.elasticsearch.xpack.core.XPackPlugin; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + + +public class HistogramAggregationTests extends ESSingleNodeTestCase { + + public void testHDRHistogram() throws Exception { + + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("data") + .field("type", "double") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("raw"); + PutMappingRequest request = new PutMappingRequest("raw").type("_doc").source(xContentBuilder); + client().admin().indices().putMapping(request).actionGet(); + + + XContentBuilder xContentBuilder2 = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("data") + .field("type", "histogram") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("pre_agg"); + PutMappingRequest request2 = new PutMappingRequest("pre_agg").type("_doc").source(xContentBuilder2); + client().admin().indices().putMapping(request2).actionGet(); + + + int numberOfSignificantValueDigits = TestUtil.nextInt(random(), 1, 5); + DoubleHistogram histogram = new DoubleHistogram(numberOfSignificantValueDigits); + BulkRequest bulkRequest = new BulkRequest(); + + int numDocs = 100000; + int frq = 10000; + + for (int i =0; i < numDocs; i ++) { + double value = random().nextDouble(); + XContentBuilder doc = XContentFactory.jsonBuilder() + .startObject() + .field("data", value) + .endObject(); + bulkRequest.add(new IndexRequest("raw").source(doc)); + histogram.recordValue(value); + if ((i + 1) % frq == 0) { + client().bulk(bulkRequest); + bulkRequest = new BulkRequest(); + List values = new ArrayList<>(); + List counts = new ArrayList<>(); + Iterator iterator = histogram.recordedValues().iterator(); + while (iterator.hasNext()) { + DoubleHistogramIterationValue histValue = iterator.next(); + double d = histValue.getValueIteratedTo(); + values.add(d); + long count = histValue.getCountAtValueIteratedTo(); + counts.add(count); + } + XContentBuilder preAggDoc = XContentFactory.jsonBuilder() + .startObject() + .startObject("data") + .field("values", values.toArray(new Double[values.size()])) + .field("counts", counts.toArray(new Long[counts.size()])) + .endObject() + .endObject(); + client().prepareIndex("pre_agg").setSource(preAggDoc).get(); + histogram.reset(); + } + } + client().admin().indices().refresh(new RefreshRequest("raw", "pre_agg")).get(); + + SearchResponse response = client().prepareSearch("raw").setTrackTotalHits(true).get(); + assertEquals(numDocs, response.getHits().getTotalHits().value); + + response = client().prepareSearch("pre_agg").get(); + assertEquals(numDocs / frq, response.getHits().getTotalHits().value); + + PercentilesAggregationBuilder builder = + AggregationBuilders.percentiles("agg").field("data").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(numberOfSignificantValueDigits).percentiles(10); + + SearchResponse responseRaw = client().prepareSearch("raw").addAggregation(builder).get(); + SearchResponse responsePreAgg = client().prepareSearch("pre_agg").addAggregation(builder).get(); + SearchResponse responseBoth = client().prepareSearch("pre_agg", "raw").addAggregation(builder).get(); + + InternalHDRPercentiles percentilesRaw = responseRaw.getAggregations().get("agg"); + InternalHDRPercentiles percentilesPreAgg = responsePreAgg.getAggregations().get("agg"); + InternalHDRPercentiles percentilesBoth = responseBoth.getAggregations().get("agg"); + for (int i = 1; i < 100; i++) { + assertEquals(percentilesRaw.percentile(i), percentilesPreAgg.percentile(i), 0.0); + assertEquals(percentilesRaw.percentile(i), percentilesBoth.percentile(i), 0.0); + } + } + + public void testTDigestHistogram() throws Exception { + + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("data") + .field("type", "double") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("raw"); + PutMappingRequest request = new PutMappingRequest("raw").type("_doc").source(xContentBuilder); + client().admin().indices().putMapping(request).actionGet(); + + + XContentBuilder xContentBuilder2 = XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("data") + .field("type", "histogram") + .endObject() + .endObject() + .endObject() + .endObject(); + createIndex("pre_agg"); + PutMappingRequest request2 = new PutMappingRequest("pre_agg").type("_doc").source(xContentBuilder2); + client().admin().indices().putMapping(request2).actionGet(); + + + int compression = TestUtil.nextInt(random(), 25, 300); + TDigestState histogram = new TDigestState(compression); + BulkRequest bulkRequest = new BulkRequest(); + + int numDocs = 100000; + int frq = 10000; + + for (int i =0; i < numDocs; i ++) { + double value = random().nextDouble(); + XContentBuilder doc = XContentFactory.jsonBuilder() + .startObject() + .field("data", value) + .endObject(); + bulkRequest.add(new IndexRequest("raw").source(doc)); + histogram.add(value); + if ((i + 1) % frq == 0) { + client().bulk(bulkRequest); + bulkRequest = new BulkRequest(); + List values = new ArrayList<>(); + List counts = new ArrayList<>(); + Collection centroids = histogram.centroids(); + for (Centroid centroid : centroids) { + double d =centroid.mean(); + values.add(d); + long count = centroid.count(); + counts.add(count); + } + XContentBuilder preAggDoc = XContentFactory.jsonBuilder() + .startObject() + .startObject("data") + .field("values", values.toArray(new Double[values.size()])) + .field("counts", counts.toArray(new Long[counts.size()])) + .endObject() + .endObject(); + client().prepareIndex("pre_agg").setSource(preAggDoc).get(); + histogram = new TDigestState(compression); + } + } + client().admin().indices().refresh(new RefreshRequest("raw", "pre_agg")).get(); + + SearchResponse response = client().prepareSearch("raw").setTrackTotalHits(true).get(); + assertEquals(numDocs, response.getHits().getTotalHits().value); + + response = client().prepareSearch("pre_agg").get(); + assertEquals(numDocs / frq, response.getHits().getTotalHits().value); + + PercentilesAggregationBuilder builder = + AggregationBuilders.percentiles("agg").field("data").method(PercentilesMethod.TDIGEST).compression(compression).percentiles(10, 25, 500, 75); + + SearchResponse responseRaw = client().prepareSearch("raw").addAggregation(builder).get(); + SearchResponse responsePreAgg = client().prepareSearch("pre_agg").addAggregation(builder).get(); + SearchResponse responseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(builder).get(); + + InternalTDigestPercentiles percentilesRaw = responseRaw.getAggregations().get("agg"); + InternalTDigestPercentiles percentilesPreAgg = responsePreAgg.getAggregations().get("agg"); + InternalTDigestPercentiles percentilesBoth = responseBoth.getAggregations().get("agg"); + for (int i = 1; i < 100; i++) { + assertEquals(percentilesRaw.percentile(i), percentilesPreAgg.percentile(i), 1e-2); + assertEquals(percentilesRaw.percentile(i), percentilesBoth.percentile(i), 1e-2); + } + } + + + @Override + protected Collection> getPlugins() { + List> plugins = new ArrayList<>(super.getPlugins()); + plugins.add(AnalyticsPlugin.class); + plugins.add(XPackPlugin.class); + return plugins; + } + +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java new file mode 100644 index 0000000000000..20cc639d32d1f --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -0,0 +1,380 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.DocumentMapperParser; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xpack.analytics.AnalyticsPlugin; +import org.elasticsearch.xpack.core.XPackPlugin; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; + + +public class HistogramFieldMapperTests extends ESSingleNodeTestCase { + + public void testParseValue() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new int[] {0, 0}) + .field("counts", new long[] {0, 0}) + .endObject() + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), notNullValue()); + } + + public void testParseArrayValue() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().startArray("pre_aggregated") + .startObject() + .field("counts", new int[] {2, 2, 3}) + .field("values", new double[] {2, 2, 3}) + .endObject() + .startObject() + .field("counts", new int[] {2, 2, 3}) + .field("values", new double[] {2, 2, 3}) + .endObject().endArray() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected an [START_OBJECT] but got [START_ARRAY]")); + } + + public void testEmptyArrays() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {}) + .field("counts", new int[] {}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("arrays for values and counts cannot be empty")); + } + + public void testMissingFieldCounts() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {2, 2}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected field called [counts]")); + } + + public void testMissingFieldValues() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 2}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected field called [values]")); + } + + public void testUnknownField() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 2}) + .field("values", new double[] {2, 2}) + .field("unknown", new double[] {2, 2}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("with unknown parameter [unknown]")); + } + + public void testFieldArraysDifferentSize() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 2}) + .field("values", new double[] {2, 2, 3}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected same length from [values] and [counts] but got [3 != 2]")); + } + + public void testFieldCountsNotArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", "bah") + .field("values", new double[] {2, 2, 3}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected an [START_ARRAY] but got [VALUE_STRING]")); + } + + public void testFieldValuesNotArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {2, 2, 3}) + .field("values", "bah") + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected an [START_ARRAY] but got [VALUE_STRING]")); + } + + public void testCountIsLong() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new long[] {2, 2, Long.MAX_VALUE}) + .field("values", new double[] {2 ,2 ,3}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString(" out of range of int")); + } + + public void testFieldNotObject() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated", "bah") + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expected an [START_OBJECT] but got [VALUE_STRING]")); + } + + public void testNegativeCount() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().startObject("pre_aggregated") + .field("counts", new int[] {2, 2, -3}) + .field("values", new double[] {2, 2, 3}) + .endObject().endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("[counts] elements must be >= 0 but got -3")); + } + + public void testSetStoredField() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("store", true); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + + DocumentMapperParser documentMapperParser = createIndex("test").mapperService().documentMapperParser(); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> + documentMapperParser.parse("_doc", new CompressedXContent(mapping))); + assertThat(e.getMessage(), containsString("The [histogram] field does not support stored fields")); + + xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("store", false); + String mapping2 = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = documentMapperParser.parse("_doc", new CompressedXContent(mapping2)); + assertNotNull(defaultMapper); + } + + public void testSetIndexField() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("index", true); + final String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + + DocumentMapperParser documentMapperParser = createIndex("test").mapperService().documentMapperParser(); + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> + documentMapperParser.parse("_doc", new CompressedXContent(mapping))); + assertThat(e.getMessage(), containsString("The [histogram] field does not support indexing")); + + xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("index", false); + final String mapping2 = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + + DocumentMapper defaultMapper = documentMapperParser.parse("_doc", new CompressedXContent(mapping2)); + assertNotNull(defaultMapper); + } + + public void testSetDocValuesField() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("doc_values", false); + final String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + + DocumentMapperParser documentMapperParser = createIndex("test").mapperService().documentMapperParser(); + + DocumentMapper defaultMapper = documentMapperParser.parse("_doc", new CompressedXContent(mapping)); + assertNotNull(defaultMapper); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {0, 0}) + .field("counts", new int[] {0, 0}) + .endObject() + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + + xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("doc_values", true); + final String mapping2 = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + + defaultMapper = documentMapperParser.parse("_doc", new CompressedXContent(mapping2)); + assertNotNull(defaultMapper); + + doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {0, 0}) + .field("counts", new int[] {0, 0}) + .endObject() + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), notNullValue()); + } + + @Override + protected Collection> getPlugins() { + List> plugins = new ArrayList<>(super.getPlugins()); + plugins.add(AnalyticsPlugin.class); + plugins.add(XPackPlugin.class); + return plugins; + } + +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java new file mode 100644 index 0000000000000..e3e1aed509058 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import com.tdunning.math.stats.Centroid; +import com.tdunning.math.stats.TDigest; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.Percentile; +import org.elasticsearch.search.aggregations.metrics.PercentileRanks; +import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; + + +public class TDigestPreAggregatedPercentileRanksAggregatorTests extends AggregatorTestCase { + + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { + TDigest histogram = new TDigestState(100.0); //default + for (double value : values) { + histogram.add(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + histogram.compress(); + Collection centroids = histogram.centroids(); + streamOutput.writeVInt(centroids.size()); + Iterator iterator = centroids.iterator(); + while ( iterator.hasNext()) { + Centroid centroid = iterator.next(); + streamOutput.writeDouble(centroid.mean()); + streamOutput.writeVInt(centroid.count()); + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + public void testSimple() throws IOException { + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir)) { + Document doc = new Document(); + doc.add(getDocValue("field", new double[] {3, 0.2, 10})); + w.addDocument(doc); + + PercentileRanksAggregationBuilder aggBuilder = new PercentileRanksAggregationBuilder("my_agg", new double[] {0.1, 0.5, 12}) + .field("field") + .method(PercentilesMethod.TDIGEST); + MappedFieldType fieldType = new HistogramFieldMapper.Builder("number").fieldType(); + fieldType.setName("field"); + try (IndexReader reader = w.getReader()) { + IndexSearcher searcher = new IndexSearcher(reader); + PercentileRanks ranks = search(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType); + Iterator rankIterator = ranks.iterator(); + Percentile rank = rankIterator.next(); + assertEquals(0.1, rank.getValue(), 0d); + // TODO: Fix T-Digest: this assertion should pass but we currently get ~15 + // https://github.com/elastic/elasticsearch/issues/14851 + // assertThat(rank.getPercent(), Matchers.equalTo(0d)); + rank = rankIterator.next(); + assertEquals(0.5, rank.getValue(), 0d); + assertThat(rank.getPercent(), Matchers.greaterThan(0d)); + assertThat(rank.getPercent(), Matchers.lessThan(100d)); + rank = rankIterator.next(); + assertEquals(12, rank.getValue(), 0d); + // TODO: Fix T-Digest: this assertion should pass but we currently get ~59 + // https://github.com/elastic/elasticsearch/issues/14851 + // assertThat(rank.getPercent(), Matchers.equalTo(100d)); + assertFalse(rankIterator.hasNext()); + assertTrue(AggregationInspectionHelper.hasValue(((InternalTDigestPercentileRanks)ranks))); + } + } + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java new file mode 100644 index 0000000000000..a2f1b653f82c5 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java @@ -0,0 +1,121 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.analytics.mapper; + +import com.tdunning.math.stats.Centroid; +import com.tdunning.math.stats.TDigest; +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.CheckedConsumer; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentiles; +import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.PercentilesMethod; +import org.elasticsearch.search.aggregations.metrics.TDigestState; +import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.function.Consumer;; + +import static java.util.Collections.singleton; + +public class TDigestPreAggregatedPercentilesAggregatorTests extends AggregatorTestCase { + + private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws IOException { + TDigest histogram = new TDigestState(100.0); //default + for (int value : values) { + histogram.add(value); + } + BytesStreamOutput streamOutput = new BytesStreamOutput(); + histogram.compress(); + Collection centroids = histogram.centroids(); + streamOutput.writeVInt(centroids.size()); + Iterator iterator = centroids.iterator(); + while ( iterator.hasNext()) { + Centroid centroid = iterator.next(); + streamOutput.writeDouble(centroid.mean()); + streamOutput.writeVInt(centroid.count()); + } + return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); + } + + public void testNoMatchingField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("wrong_number", new int[]{7, 1}))); + }, hdr -> { + //assertEquals(0L, hdr.state.getTotalCount()); + assertFalse(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testSomeMatchesBinaryDocValues() throws IOException { + testCase(new DocValuesFieldExistsQuery("number"), iw -> { + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + }, hdr -> { + //assertEquals(4L, hdr.state.getTotalCount()); + double approximation = 0.05d; + assertEquals(15.0d, hdr.percentile(25), approximation); + assertEquals(30.0d, hdr.percentile(50), approximation); + assertEquals(50.0d, hdr.percentile(75), approximation); + assertEquals(60.0d, hdr.percentile(99), approximation); + assertTrue(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + public void testSomeMatchesMultiBinaryDocValues() throws IOException { + testCase(new DocValuesFieldExistsQuery("number"), iw -> { + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + }, hdr -> { + //assertEquals(16L, hdr.state.getTotalCount()); + double approximation = 0.05d; + assertEquals(15.0d, hdr.percentile(25), approximation); + assertEquals(30.0d, hdr.percentile(50), approximation); + assertEquals(50.0d, hdr.percentile(75), approximation); + assertEquals(60.0d, hdr.percentile(99), approximation); + assertTrue(AggregationInspectionHelper.hasValue(hdr)); + }); + } + + private void testCase(Query query, CheckedConsumer buildIndex, + Consumer verify) throws IOException { + try (Directory directory = newDirectory()) { + try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + buildIndex.accept(indexWriter); + } + + try (IndexReader indexReader = DirectoryReader.open(directory)) { + IndexSearcher indexSearcher = newSearcher(indexReader, true, true); + + PercentilesAggregationBuilder builder = + new PercentilesAggregationBuilder("test").field("number").method(PercentilesMethod.TDIGEST); + + MappedFieldType fieldType = new HistogramFieldMapper.Builder("number").fieldType(); + fieldType.setName("number"); + Aggregator aggregator = createAggregator(builder, indexSearcher, fieldType); + aggregator.preCollection(); + indexSearcher.search(query, aggregator); + aggregator.postCollection(); + verify.accept((InternalTDigestPercentiles) aggregator.buildAggregation(0L)); + + } + } + } +} From 550394c184045166ca621c6529bcbc34808dfb42 Mon Sep 17 00:00:00 2001 From: iverase Date: Mon, 28 Oct 2019 13:49:31 +0100 Subject: [PATCH 02/24] checkStyle --- .../AbstractHDRPercentilesAggregator.java | 6 ++++-- .../AbstractTDigestPercentilesAggregator.java | 6 ++++-- .../metrics/HDRPercentileRanksAggregator.java | 1 - .../HDRPercentileRanksAggregatorFactory.java | 1 - .../PercentileRanksAggregationBuilder.java | 7 ++++--- .../TDigestPercentileRanksAggregator.java | 1 - ...igestPercentileRanksAggregatorFactory.java | 1 - .../mapper/HistogramFieldMapper.java | 4 ---- .../mapper/HistogramAggregationTests.java | 20 ++++++++----------- 9 files changed, 20 insertions(+), 27 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java index e5e7bd8d2b9e2..cdc67960900af 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java @@ -88,7 +88,8 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, } - private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues values, final BigArrays bigArrays, final LeafBucketCollector sub) { + private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues values, + final BigArrays bigArrays, final LeafBucketCollector sub) { return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { @@ -117,7 +118,8 @@ public void collect(int doc, long bucket) throws IOException { }; } - private LeafBucketCollector collectHistogramValues(final HistogramValues values, final BigArrays bigArrays, final LeafBucketCollector sub) { + private LeafBucketCollector collectHistogramValues(final HistogramValues values, + final BigArrays bigArrays, final LeafBucketCollector sub) { return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java index b7d90bfbe32a2..fad1a4281b144 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java @@ -87,7 +87,8 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, } - private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues values, final BigArrays bigArrays, final LeafBucketCollector sub) { + private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues values, + final BigArrays bigArrays, final LeafBucketCollector sub) { return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { @@ -109,7 +110,8 @@ public void collect(int doc, long bucket) throws IOException { }; } - private LeafBucketCollector collectHistogramValues(final HistogramValues values, final BigArrays bigArrays, final LeafBucketCollector sub) { + private LeafBucketCollector collectHistogramValues(final HistogramValues values, + final BigArrays bigArrays, final LeafBucketCollector sub) { return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java index 3a51ef54a0289..7da8c912b5e91 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java @@ -24,7 +24,6 @@ import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java index 81919404dd665..3e3717b146fad 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java index 91c57dc6949f0..3c52adac89b79 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentileRanksAggregationBuilder.java @@ -32,7 +32,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.support.ValueType; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder.LeafOnly; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; @@ -240,8 +239,10 @@ public PercentilesMethod method() { } @Override - protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, ValuesSourceConfig config, - AggregatorFactory parent, Builder subFactoriesBuilder) throws IOException { + protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, + ValuesSourceConfig config, + AggregatorFactory parent, + Builder subFactoriesBuilder) throws IOException { switch (method) { case TDIGEST: return new TDigestPercentileRanksAggregatorFactory(name, config, values, compression, keyed, queryShardContext, parent, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java index f902360971c0b..831b302f313e3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregator.java @@ -23,7 +23,6 @@ import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java index 6fe529cee3112..5a6142263736d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentileRanksAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 0aa1cb82a5bf5..9441946f9d0ab 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -214,10 +214,6 @@ public MappedFieldType clone() { public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { failIfNoDocValues(); return new IndexFieldData.Builder() { - @Override - public int hashCode() { - return super.hashCode(); - } @Override public IndexFieldData build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache, CircuitBreakerService breakerService, MapperService mapperService) { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java index e63b17abd8831..9e14234b5b3f8 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java @@ -89,20 +89,18 @@ public void testHDRHistogram() throws Exception { client().bulk(bulkRequest); bulkRequest = new BulkRequest(); List values = new ArrayList<>(); - List counts = new ArrayList<>(); + List counts = new ArrayList<>(); Iterator iterator = histogram.recordedValues().iterator(); while (iterator.hasNext()) { DoubleHistogramIterationValue histValue = iterator.next(); - double d = histValue.getValueIteratedTo(); - values.add(d); - long count = histValue.getCountAtValueIteratedTo(); - counts.add(count); + values.add(histValue.getValueIteratedTo()); + counts.add(Math.toIntExact(histValue.getCountAtValueIteratedTo())); } XContentBuilder preAggDoc = XContentFactory.jsonBuilder() .startObject() .startObject("data") .field("values", values.toArray(new Double[values.size()])) - .field("counts", counts.toArray(new Long[counts.size()])) + .field("counts", counts.toArray(new Integer[counts.size()])) .endObject() .endObject(); client().prepareIndex("pre_agg").setSource(preAggDoc).get(); @@ -184,19 +182,17 @@ public void testTDigestHistogram() throws Exception { client().bulk(bulkRequest); bulkRequest = new BulkRequest(); List values = new ArrayList<>(); - List counts = new ArrayList<>(); + List counts = new ArrayList<>(); Collection centroids = histogram.centroids(); for (Centroid centroid : centroids) { - double d =centroid.mean(); - values.add(d); - long count = centroid.count(); - counts.add(count); + values.add(centroid.mean()); + counts.add(centroid.count()); } XContentBuilder preAggDoc = XContentFactory.jsonBuilder() .startObject() .startObject("data") .field("values", values.toArray(new Double[values.size()])) - .field("counts", counts.toArray(new Long[counts.size()])) + .field("counts", counts.toArray(new Integer[counts.size()])) .endObject() .endObject(); client().prepareIndex("pre_agg").setSource(preAggDoc).get(); From 9d4f9c4e563862ab700f279133423a688d888b39 Mon Sep 17 00:00:00 2001 From: iverase Date: Mon, 28 Oct 2019 14:07:13 +0100 Subject: [PATCH 03/24] more checkStyle --- .../mapper/HistogramFieldMapper.java | 21 ++++++++++++------- ...regatedPercentileRanksAggregatorTests.java | 3 ++- ...eAggregatedPercentilesAggregatorTests.java | 3 ++- .../mapper/HistogramAggregationTests.java | 6 ++++-- ...eAggregatedPercentilesAggregatorTests.java | 2 +- 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 9441946f9d0ab..2b1e8c25b89d2 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -148,7 +148,8 @@ public HistogramFieldMapper build(BuilderContext context) { public static class TypeParser implements Mapper.TypeParser { @Override - public Mapper.Builder parse(String name, Map node, ParserContext parserContext) + public Mapper.Builder parse(String name, + Map node, ParserContext parserContext) throws MapperParsingException { Builder builder = new HistogramFieldMapper.Builder(name); parseField(builder, name, node, parserContext); @@ -216,7 +217,8 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) { return new IndexFieldData.Builder() { @Override - public IndexFieldData build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache, CircuitBreakerService breakerService, MapperService mapperService) { + public IndexFieldData build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache, + CircuitBreakerService breakerService, MapperService mapperService) { return new IndexHistogramFieldData(indexSettings.getIndex(), fieldType.name()) { @@ -281,14 +283,16 @@ public AtomicHistogramFieldData loadDirect(LeafReaderContext context) throws Exc } @Override - public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) { + public SortField sortField(Object missingValue, MultiValueMode sortMode, + XFieldComparatorSource.Nested nested, boolean reverse) { return null; } }; } private HistogramValue getHistogramValue(final BytesRef bytesRef) throws IOException { - final ByteBufferStreamInput streamInput = new ByteBufferStreamInput(ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length)); + final ByteBufferStreamInput streamInput = new ByteBufferStreamInput( + ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length)); final int numValues = streamInput.readVInt(); return new HistogramValue() { double value; @@ -334,13 +338,15 @@ public Query existsQuery(QueryShardContext context) { if (hasDocValues()) { return new DocValuesFieldExistsQuery(name()); } else { - throw new QueryShardException(context, "field " + name() + " of type [" + CONTENT_TYPE + "] has no doc values and cannot be searched"); + throw new QueryShardException(context, "field " + name() + " of type [" + CONTENT_TYPE + "] " + + "has no doc values and cannot be searched"); } } @Override public Query termQuery(Object value, QueryShardContext context) { - throw new QueryShardException(context, "[" + CONTENT_TYPE + "] field do not support searching, use dedicated aggregations instead: [" + throw new QueryShardException(context, "[" + CONTENT_TYPE + "] field do not support searching, " + + "use dedicated aggregations instead: [" + name() + "]"); } } @@ -436,7 +442,8 @@ public void parse(ParseContext context) throws IOException { if (ignoreMalformed.value()) { return; } - throw new MapperParsingException("failed to parse field [{}] of type [{}]", ex, fieldType().name(), fieldType().typeName()); + throw new MapperParsingException("failed to parse field [{}] of type [{}]", + ex, fieldType().name(), fieldType().typeName()); } context.path().remove(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java index fe1bd57d1a1f5..75e3493e834de 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java @@ -41,7 +41,8 @@ private BinaryDocValuesField getDocValue(String fieldName, double[] values) thro } BytesStreamOutput streamOutput = new BytesStreamOutput(); DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); - List histogramValues = StreamSupport.stream(recordedValues.spliterator(), false).collect(Collectors.toList()); + List histogramValues = StreamSupport.stream(recordedValues.spliterator(), false) + .collect(Collectors.toList()); streamOutput.writeVInt(histogramValues.size()); Iterator iterator = recordedValues.iterator(); while (iterator.hasNext()) { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java index b5988dd973ff8..98bb5fcc28bda 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java @@ -44,7 +44,8 @@ private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws } BytesStreamOutput streamOutput = new BytesStreamOutput(); DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); - List histogramValues = StreamSupport.stream(recordedValues.spliterator(), false).collect(Collectors.toList()); + List histogramValues = StreamSupport.stream(recordedValues.spliterator(), false) + .collect(Collectors.toList()); streamOutput.writeVInt(histogramValues.size()); Iterator iterator = recordedValues.iterator(); while (iterator.hasNext()) { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java index 9e14234b5b3f8..af1d7266e9f0c 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java @@ -116,7 +116,8 @@ public void testHDRHistogram() throws Exception { assertEquals(numDocs / frq, response.getHits().getTotalHits().value); PercentilesAggregationBuilder builder = - AggregationBuilders.percentiles("agg").field("data").method(PercentilesMethod.HDR).numberOfSignificantValueDigits(numberOfSignificantValueDigits).percentiles(10); + AggregationBuilders.percentiles("agg").field("data").method(PercentilesMethod.HDR) + .numberOfSignificantValueDigits(numberOfSignificantValueDigits).percentiles(10); SearchResponse responseRaw = client().prepareSearch("raw").addAggregation(builder).get(); SearchResponse responsePreAgg = client().prepareSearch("pre_agg").addAggregation(builder).get(); @@ -208,7 +209,8 @@ public void testTDigestHistogram() throws Exception { assertEquals(numDocs / frq, response.getHits().getTotalHits().value); PercentilesAggregationBuilder builder = - AggregationBuilders.percentiles("agg").field("data").method(PercentilesMethod.TDIGEST).compression(compression).percentiles(10, 25, 500, 75); + AggregationBuilders.percentiles("agg").field("data").method(PercentilesMethod.TDIGEST) + .compression(compression).percentiles(10, 25, 500, 75); SearchResponse responseRaw = client().prepareSearch("raw").addAggregation(builder).get(); SearchResponse responsePreAgg = client().prepareSearch("pre_agg").addAggregation(builder).get(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java index a2f1b653f82c5..fab959d861b6c 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java @@ -30,7 +30,7 @@ import java.io.IOException; import java.util.Collection; import java.util.Iterator; -import java.util.function.Consumer;; +import java.util.function.Consumer; import static java.util.Collections.singleton; From 4e3eed76f31a6d2f1cd63856f7a0cb226c41b136 Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 08:08:31 +0100 Subject: [PATCH 04/24] Addressed part of the review --- .../metrics/HDRPercentilesAggregator.java | 4 +- .../HDRPercentilesAggregatorFactory.java | 38 +++++----- .../PercentilesAggregationBuilder.java | 32 ++++---- .../TDigestPercentilesAggregatorFactory.java | 20 ++--- .../mapper/HistogramFieldMapper.java | 73 +++++++++---------- .../mapper/HistogramFieldMapperTests.java | 52 ++++++++++++- 6 files changed, 129 insertions(+), 90 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java index b3720f63d01d1..ae45dcf50d4d1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregator.java @@ -33,8 +33,8 @@ class HDRPercentilesAggregator extends AbstractHDRPercentilesAggregator { HDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] percents, - int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, - List pipelineAggregators, Map metaData) throws IOException { + int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, + List pipelineAggregators, Map metaData) throws IOException { super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, formatter, pipelineAggregators, metaData); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java index 6f69afcb552c2..4df4e19aa5df9 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentilesAggregatorFactory.java @@ -40,14 +40,14 @@ class HDRPercentilesAggregatorFactory extends ValuesSourceAggregatorFactory config, - double[] percents, - int numberOfSignificantValueDigits, - boolean keyed, - QueryShardContext queryShardContext, - AggregatorFactory parent, - AggregatorFactories.Builder subFactoriesBuilder, - Map metaData) throws IOException { + ValuesSourceConfig config, + double[] percents, + int numberOfSignificantValueDigits, + boolean keyed, + QueryShardContext queryShardContext, + AggregatorFactory parent, + AggregatorFactories.Builder subFactoriesBuilder, + Map metaData) throws IOException { super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData); this.percents = percents; this.numberOfSignificantValueDigits = numberOfSignificantValueDigits; @@ -56,23 +56,23 @@ class HDRPercentilesAggregatorFactory extends ValuesSourceAggregatorFactory pipelineAggregators, - Map metaData) - throws IOException { + Aggregator parent, + List pipelineAggregators, + Map metaData) + throws IOException { return new HDRPercentilesAggregator(name, null, searchContext, parent, percents, numberOfSignificantValueDigits, keyed, - config.format(), pipelineAggregators, metaData); + config.format(), pipelineAggregators, metaData); } @Override protected Aggregator doCreateInternal(ValuesSource valuesSource, - SearchContext searchContext, - Aggregator parent, - boolean collectsFromSingleBucket, - List pipelineAggregators, - Map metaData) throws IOException { + SearchContext searchContext, + Aggregator parent, + boolean collectsFromSingleBucket, + List pipelineAggregators, + Map metaData) throws IOException { return new HDRPercentilesAggregator(name, valuesSource, searchContext, parent, percents, numberOfSignificantValueDigits, keyed, - config.format(), pipelineAggregators, metaData); + config.format(), pipelineAggregators, metaData); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java index a2271fd700ae1..429dc45ca553c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/PercentilesAggregationBuilder.java @@ -58,7 +58,7 @@ private static class TDigestOptions { } private static final ObjectParser TDIGEST_OPTIONS_PARSER = - new ObjectParser<>(PercentilesMethod.TDIGEST.getParseField().getPreferredName(), TDigestOptions::new); + new ObjectParser<>(PercentilesMethod.TDIGEST.getParseField().getPreferredName(), TDigestOptions::new); static { TDIGEST_OPTIONS_PARSER.declareDouble((opts, compression) -> opts.compression = compression, COMPRESSION_FIELD); } @@ -68,11 +68,11 @@ private static class HDROptions { } private static final ObjectParser HDR_OPTIONS_PARSER = - new ObjectParser<>(PercentilesMethod.HDR.getParseField().getPreferredName(), HDROptions::new); + new ObjectParser<>(PercentilesMethod.HDR.getParseField().getPreferredName(), HDROptions::new); static { HDR_OPTIONS_PARSER.declareInt( - (opts, numberOfSigDigits) -> opts.numberOfSigDigits = numberOfSigDigits, - NUMBER_SIGNIFICANT_DIGITS_FIELD); + (opts, numberOfSigDigits) -> opts.numberOfSigDigits = numberOfSigDigits, + NUMBER_SIGNIFICANT_DIGITS_FIELD); } private static final ObjectParser PARSER; @@ -81,8 +81,8 @@ private static class HDROptions { ValuesSourceParserHelper.declareAnyFields(PARSER, true, true); PARSER.declareDoubleArray( - (b, v) -> b.percentiles(v.stream().mapToDouble(Double::doubleValue).toArray()), - PERCENTS_FIELD); + (b, v) -> b.percentiles(v.stream().mapToDouble(Double::doubleValue).toArray()), + PERCENTS_FIELD); PARSER.declareBoolean(PercentilesAggregationBuilder::keyed, KEYED_FIELD); @@ -263,18 +263,18 @@ public PercentilesMethod method() { @Override protected ValuesSourceAggregatorFactory innerBuild(QueryShardContext queryShardContext, - ValuesSourceConfig config, - AggregatorFactory parent, - Builder subFactoriesBuilder) throws IOException { + ValuesSourceConfig config, + AggregatorFactory parent, + Builder subFactoriesBuilder) throws IOException { switch (method) { - case TDIGEST: - return new TDigestPercentilesAggregatorFactory(name, config, percents, compression, keyed, queryShardContext, parent, + case TDIGEST: + return new TDigestPercentilesAggregatorFactory(name, config, percents, compression, keyed, queryShardContext, parent, subFactoriesBuilder, metaData); - case HDR: - return new HDRPercentilesAggregatorFactory(name, config, percents, - numberOfSignificantValueDigits, keyed, queryShardContext, parent, subFactoriesBuilder, metaData); - default: - throw new IllegalStateException("Illegal method [" + method + "]"); + case HDR: + return new HDRPercentilesAggregatorFactory(name, config, percents, + numberOfSignificantValueDigits, keyed, queryShardContext, parent, subFactoriesBuilder, metaData); + default: + throw new IllegalStateException("Illegal method [" + method + "]"); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java index c9ba61331112c..012dde2a92f70 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestPercentilesAggregatorFactory.java @@ -51,22 +51,22 @@ class TDigestPercentilesAggregatorFactory @Override protected Aggregator createUnmapped(SearchContext searchContext, - Aggregator parent, - List pipelineAggregators, - Map metaData) throws IOException { + Aggregator parent, + List pipelineAggregators, + Map metaData) throws IOException { return new TDigestPercentilesAggregator(name, null, searchContext, parent, percents, compression, keyed, config.format(), - pipelineAggregators, metaData); + pipelineAggregators, metaData); } @Override protected Aggregator doCreateInternal(ValuesSource valuesSource, - SearchContext searchContext, - Aggregator parent, - boolean collectsFromSingleBucket, - List pipelineAggregators, - Map metaData) throws IOException { + SearchContext searchContext, + Aggregator parent, + boolean collectsFromSingleBucket, + List pipelineAggregators, + Map metaData) throws IOException { return new TDigestPercentilesAggregator(name, valuesSource, searchContext, parent, percents, compression, keyed, config.format(), - pipelineAggregators, metaData); + pipelineAggregators, metaData); } } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 2b1e8c25b89d2..504a0a4116229 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -6,6 +6,8 @@ package org.elasticsearch.xpack.analytics.mapper; +import com.carrotsearch.hppc.DoubleArrayList; +import com.carrotsearch.hppc.IntArrayList; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.index.BinaryDocValues; @@ -27,7 +29,6 @@ import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.fielddata.AtomicHistogramFieldData; -import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.HistogramValue; import org.elasticsearch.index.fielddata.HistogramValues; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -35,7 +36,6 @@ import org.elasticsearch.index.fielddata.IndexHistogramFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; -import org.elasticsearch.index.mapper.ArrayValueMapperParser; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.Mapper; @@ -49,18 +49,18 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.index.mapper.TypeParsers.parseField; /** * Field Mapper for pre-aggregated histograms. * */ -public class HistogramFieldMapper extends FieldMapper implements ArrayValueMapperParser { +public class HistogramFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "histogram"; public static class Names { @@ -252,17 +252,14 @@ public HistogramValue histogram() { @Override public ScriptDocValues getScriptValues() { - return new ScriptDocValues.Strings(getBytesValues()); + throw new UnsupportedOperationException("The [" + CONTENT_TYPE + "] field does not " + + "support scripts"); } @Override public SortedBinaryDocValues getBytesValues() { - try { - final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldName); - return FieldData.singleton(values); - } catch (IOException e) { - throw new IllegalStateException("Cannot load doc values", e); - } + throw new UnsupportedOperationException("String representation of doc values " + + "for [" + CONTENT_TYPE + "] fields is not supported"); } @Override @@ -285,7 +282,7 @@ public AtomicHistogramFieldData loadDirect(LeafReaderContext context) throws Exc @Override public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) { - return null; + throw new UnsupportedOperationException("can't sort on the [" + CONTENT_TYPE + "] field"); } }; } @@ -352,49 +349,43 @@ public Query termQuery(Object value, QueryShardContext context) { } @Override - public void parse(ParseContext context) throws IOException { + public void parse(ParseContext context) { + if (context.externalValueSet()) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] can't be used in multi-fields"); + } context.path().add(simpleName()); try { - List values = null; - List counts = null; + DoubleArrayList values = null; + IntArrayList counts = null; XContentParser.Token token = context.parser().currentToken(); - if (token != XContentParser.Token.START_OBJECT) { - throw new MapperParsingException("error parsing field [" - + name() + "], expected an [" + XContentParser.Token.START_OBJECT.name() + - "] but got [" + token.name() + "]"); - } + // should be an object + ensureExpectedToken(XContentParser.Token.START_OBJECT, token, context.parser()::getTokenLocation); token = context.parser().nextToken(); while (token != XContentParser.Token.END_OBJECT) { - if (token != XContentParser.Token.FIELD_NAME) { - throw new MapperParsingException("error parsing field [" - + name() + "], expected a field but got " + context.parser().currentName()); - } + // should be an field + ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, context.parser()::getTokenLocation); String fieldName = context.parser().currentName(); if (fieldName.equals(VALUES_FIELD.getPreferredName())) { token = context.parser().nextToken(); - //should be an array - if (token != XContentParser.Token.START_ARRAY) { - throw new MapperParsingException("error parsing field [" - + name() + "], expected an [" + XContentParser.Token.START_ARRAY.name() + - "] but got [" + token.name() + "]"); - } - values = new ArrayList<>(); + // should be an array + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, context.parser()::getTokenLocation); + values = new DoubleArrayList(); token = context.parser().nextToken(); while (token != XContentParser.Token.END_ARRAY) { + // should be a number + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, context.parser()::getTokenLocation); values.add(context.parser().doubleValue()); token = context.parser().nextToken(); } } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { token = context.parser().nextToken(); - //should be an array - if (token != XContentParser.Token.START_ARRAY) { - throw new MapperParsingException("error parsing field [" - + name() + "], expected an [" + XContentParser.Token.START_ARRAY.name() + - "] but got [" + token.name() + "]"); - } - counts = new ArrayList<>(); + // should be an array + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, context.parser()::getTokenLocation); + counts = new IntArrayList(); token = context.parser().nextToken(); while (token != XContentParser.Token.END_ARRAY) { + // should be a number + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, context.parser()::getTokenLocation); counts.add(context.parser().intValue()); token = context.parser().nextToken(); } @@ -435,7 +426,11 @@ public void parse(ParseContext context) throws IOException { Field field = new BinaryDocValuesField(simpleName(), streamOutput.bytes().toBytesRef()); streamOutput.close(); - context.doc().add(field); + if (context.doc().getByKey(fieldType().name()) != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + + "] doesn't not support indexing multiple values for the same field in the same document"); + } + context.doc().addWithKey(fieldType().name(), field); } } catch (Exception ex) { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index 20cc639d32d1f..cf9af5dafb7c3 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -75,7 +75,8 @@ public void testParseArrayValue() throws Exception { XContentType.JSON); Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); - assertThat(e.getCause().getMessage(), containsString("expected an [START_OBJECT] but got [START_ARRAY]")); + assertThat(e.getCause().getMessage(), containsString("doesn't not support indexing multiple values " + + "for the same field in the same document")); } public void testEmptyArrays() throws Exception { @@ -200,7 +201,49 @@ public void testFieldCountsNotArray() throws Exception { XContentType.JSON); Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); - assertThat(e.getCause().getMessage(), containsString("expected an [START_ARRAY] but got [VALUE_STRING]")); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [START_ARRAY] but found [VALUE_STRING]")); + } + + public void testFieldCountsStringArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new String[] {"4", "5", "6"}) + .field("values", new double[] {2, 2, 3}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [VALUE_NUMBER] but found [VALUE_STRING]")); + } + + public void testFieldValuesStringArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new int[] {4, 5, 6}) + .field("values", new String[] {"2", "2", "3"}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [VALUE_NUMBER] but found [VALUE_STRING]")); } public void testFieldValuesNotArray() throws Exception { @@ -221,7 +264,7 @@ public void testFieldValuesNotArray() throws Exception { XContentType.JSON); Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); - assertThat(e.getCause().getMessage(), containsString("expected an [START_ARRAY] but got [VALUE_STRING]")); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [START_ARRAY] but found [VALUE_STRING]")); } public void testCountIsLong() throws Exception { @@ -260,7 +303,8 @@ public void testFieldNotObject() throws Exception { XContentType.JSON); Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); - assertThat(e.getCause().getMessage(), containsString("expected an [START_OBJECT] but got [VALUE_STRING]")); + assertThat(e.getCause().getMessage(), containsString("expecting token of type [START_OBJECT] " + + "but found [VALUE_STRING]")); } public void testNegativeCount() throws Exception { From a168d329c4105740a3873ff7818d6ca449194f2a Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 08:21:26 +0100 Subject: [PATCH 05/24] Extract the logic of creating a new histogram to a separate method --- .../AbstractHDRPercentilesAggregator.java | 48 ++++++++----------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java index cdc67960900af..f80904443663c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java @@ -93,21 +93,7 @@ private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues value return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { - states = bigArrays.grow(states, bucket + 1); - - DoubleHistogram state = states.get(bucket); - if (state == null) { - state = new DoubleHistogram(numberOfSignificantValueDigits); - // Set the histogram to autosize so it can resize itself as - // the data range increases. Resize operations should be - // rare as the histogram buckets are exponential (on the top - // level). In the future we could expose the range as an - // option on the request so the histogram can be fixed at - // initialisation and doesn't need resizing. - state.setAutoResize(true); - states.set(bucket, state); - } - + DoubleHistogram state = getExistingOrNewHistogram(bigArrays, bucket); if (values.advanceExact(doc)) { final int valueCount = values.docValueCount(); for (int i = 0; i < valueCount; i++) { @@ -123,20 +109,7 @@ private LeafBucketCollector collectHistogramValues(final HistogramValues values, return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { - states = bigArrays.grow(states, bucket + 1); - DoubleHistogram state = states.get(bucket); - if (state == null) { - state = new DoubleHistogram(numberOfSignificantValueDigits); - // Set the histogram to autosize so it can resize itself as - // the data range increases. Resize operations should be - // rare as the histogram buckets are exponential (on the top - // level). In the future we could expose the range as an - // option on the request so the histogram can be fixed at - // initialisation and doesn't need resizing. - state.setAutoResize(true); - states.set(bucket, state); - } - + DoubleHistogram state = getExistingOrNewHistogram(bigArrays, bucket); if (values.advanceExact(doc)) { final HistogramValue sketch = values.histogram(); while(sketch.next()) { @@ -147,6 +120,23 @@ public void collect(int doc, long bucket) throws IOException { }; } + private DoubleHistogram getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) { + states = bigArrays.grow(states, bucket + 1); + DoubleHistogram state = states.get(bucket); + if (state == null) { + state = new DoubleHistogram(numberOfSignificantValueDigits); + // Set the histogram to autosize so it can resize itself as + // the data range increases. Resize operations should be + // rare as the histogram buckets are exponential (on the top + // level). In the future we could expose the range as an + // option on the request so the histogram can be fixed at + // initialisation and doesn't need resizing. + state.setAutoResize(true); + states.set(bucket, state); + } + return state; + } + @Override public boolean hasMetric(String name) { From 038d42942d32f03dd7dd16af437bfe52ccca6df9 Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 08:39:12 +0100 Subject: [PATCH 06/24] Addressed more comments. --- .../index/fielddata/AtomicHistogramFieldData.java | 4 +++- .../index/fielddata/HistogramValues.java | 2 +- .../search/aggregations/support/ValuesSource.java | 7 +++---- .../analytics/mapper/HistogramFieldMapper.java | 14 +++++--------- ...reAggregatedPercentileRanksAggregatorTests.java | 3 --- ...HDRPreAggregatedPercentilesAggregatorTests.java | 3 --- ...reAggregatedPercentileRanksAggregatorTests.java | 1 - ...estPreAggregatedPercentilesAggregatorTests.java | 1 - 8 files changed, 12 insertions(+), 23 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java index 8eeaacf44061e..1678ca0df7783 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/AtomicHistogramFieldData.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.fielddata; +import java.io.IOException; + /** * {@link AtomicFieldData} specialization for histogram data. */ @@ -27,6 +29,6 @@ public interface AtomicHistogramFieldData extends AtomicFieldData { /** * Return Histogram values. */ - HistogramValues getHistogramValues(); + HistogramValues getHistogramValues() throws IOException; } diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java index 8e0e0cbc9181a..f2a0a9c9092d6 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/HistogramValues.java @@ -36,6 +36,6 @@ public abstract class HistogramValues { * Get the {@link HistogramValue} associated with the current document. * The returned {@link HistogramValue} might be reused across calls. */ - public abstract HistogramValue histogram(); + public abstract HistogramValue histogram() throws IOException; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java index 3d92501a9071c..a2f5da07e3ce1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java @@ -553,11 +553,10 @@ public org.elasticsearch.index.fielddata.MultiGeoPointValues geoPointValues(Leaf } } } - - + public abstract static class Histogram extends ValuesSource { - public abstract HistogramValues getHistogramValues(LeafReaderContext context); + public abstract HistogramValues getHistogramValues(LeafReaderContext context) throws IOException; public static class Fielddata extends Histogram { @@ -583,7 +582,7 @@ public boolean advanceExact(int doc) throws IOException { }; } - public HistogramValues getHistogramValues(LeafReaderContext context) { + public HistogramValues getHistogramValues(LeafReaderContext context) throws IOException { return indexFieldData.load(context).getHistogramValues(); } } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 504a0a4116229..763792deacbd6 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -226,7 +226,7 @@ public IndexFieldData build(IndexSettings indexSettings, MappedFieldType fiel public AtomicHistogramFieldData load(LeafReaderContext context) { return new AtomicHistogramFieldData() { @Override - public HistogramValues getHistogramValues() { + public HistogramValues getHistogramValues() throws IOException { try { final BinaryDocValues values = DocValues.getBinary(context.reader(), fieldName); return new HistogramValues() { @@ -236,16 +236,16 @@ public boolean advanceExact(int doc) throws IOException { } @Override - public HistogramValue histogram() { + public HistogramValue histogram() throws IOException { try { return getHistogramValue(values.binaryValue()); } catch (IOException e) { - throw new IllegalStateException("Cannot load doc value", e); + throw new IOException("Cannot load doc value", e); } } }; } catch (IOException e) { - throw new IllegalStateException("Cannot load doc values", e); + throw new IOException("Cannot load doc values", e); } } @@ -290,17 +290,14 @@ public SortField sortField(Object missingValue, MultiValueMode sortMode, private HistogramValue getHistogramValue(final BytesRef bytesRef) throws IOException { final ByteBufferStreamInput streamInput = new ByteBufferStreamInput( ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length)); - final int numValues = streamInput.readVInt(); return new HistogramValue() { double value; int count; - int position; boolean isExhausted; @Override public boolean next() throws IOException { - if (position < numValues) { - position++; + if (streamInput.available() > 0) { value = streamInput.readDouble(); count = streamInput.readVInt(); return true; @@ -414,7 +411,6 @@ public void parse(ParseContext context) { } if (fieldType().hasDocValues()) { BytesStreamOutput streamOutput = new BytesStreamOutput(); - streamOutput.writeVInt(values.size()); for (int i = 0; i < values.size(); i++) { streamOutput.writeDouble(values.get(i)); if (counts.get(i) < 0) { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java index 75e3493e834de..3e5f8e8986e1f 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java @@ -41,9 +41,6 @@ private BinaryDocValuesField getDocValue(String fieldName, double[] values) thro } BytesStreamOutput streamOutput = new BytesStreamOutput(); DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); - List histogramValues = StreamSupport.stream(recordedValues.spliterator(), false) - .collect(Collectors.toList()); - streamOutput.writeVInt(histogramValues.size()); Iterator iterator = recordedValues.iterator(); while (iterator.hasNext()) { DoubleHistogramIterationValue value = iterator.next(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java index 98bb5fcc28bda..2ce66ff358125 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java @@ -44,9 +44,6 @@ private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws } BytesStreamOutput streamOutput = new BytesStreamOutput(); DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); - List histogramValues = StreamSupport.stream(recordedValues.spliterator(), false) - .collect(Collectors.toList()); - streamOutput.writeVInt(histogramValues.size()); Iterator iterator = recordedValues.iterator(); while (iterator.hasNext()) { DoubleHistogramIterationValue value = iterator.next(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java index e3e1aed509058..df6183be50a41 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java @@ -41,7 +41,6 @@ private BinaryDocValuesField getDocValue(String fieldName, double[] values) thro BytesStreamOutput streamOutput = new BytesStreamOutput(); histogram.compress(); Collection centroids = histogram.centroids(); - streamOutput.writeVInt(centroids.size()); Iterator iterator = centroids.iterator(); while ( iterator.hasNext()) { Centroid centroid = iterator.next(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java index fab959d861b6c..47be63147791e 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java @@ -44,7 +44,6 @@ private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws BytesStreamOutput streamOutput = new BytesStreamOutput(); histogram.compress(); Collection centroids = histogram.centroids(); - streamOutput.writeVInt(centroids.size()); Iterator iterator = centroids.iterator(); while ( iterator.hasNext()) { Centroid centroid = iterator.next(); From edc2faf46f3eac4ede54c7dfce17633b1b649c4e Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 08:42:14 +0100 Subject: [PATCH 07/24] formatting --- .../aggregations/metrics/HDRPercentileRanksAggregator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java index 7da8c912b5e91..308d40c94cf8b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/HDRPercentileRanksAggregator.java @@ -33,8 +33,8 @@ class HDRPercentileRanksAggregator extends AbstractHDRPercentilesAggregator { HDRPercentileRanksAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, - double[] percents, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat format, - List pipelineAggregators, Map metaData) throws IOException { + double[] percents, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat format, + List pipelineAggregators, Map metaData) throws IOException { super(name, valuesSource, context, parent, percents, numberOfSignificantValueDigits, keyed, format, pipelineAggregators, metaData); } From c527aec60d93c0db2c86a5da91a0cfe32b5f55b8 Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 08:47:04 +0100 Subject: [PATCH 08/24] extract logic for getting histogram in TDigest --- .../AbstractTDigestPercentilesAggregator.java | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java index fad1a4281b144..1b78db480068e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java @@ -92,14 +92,7 @@ private LeafBucketCollector collectNumeric(final SortedNumericDoubleValues value return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { - states = bigArrays.grow(states, bucket + 1); - - TDigestState state = states.get(bucket); - if (state == null) { - state = new TDigestState(compression); - states.set(bucket, state); - } - + TDigestState state = getExistingOrNewHistogram(bigArrays, bucket); if (values.advanceExact(doc)) { final int valueCount = values.docValueCount(); for (int i = 0; i < valueCount; i++) { @@ -115,13 +108,7 @@ private LeafBucketCollector collectHistogramValues(final HistogramValues values, return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long bucket) throws IOException { - states = bigArrays.grow(states, bucket + 1); - TDigestState state = states.get(bucket); - if (state == null) { - state = new TDigestState(compression); - states.set(bucket, state); - } - + TDigestState state = getExistingOrNewHistogram(bigArrays, bucket); if (values.advanceExact(doc)) { final HistogramValue sketch = values.histogram(); while(sketch.next()) { @@ -132,6 +119,16 @@ public void collect(int doc, long bucket) throws IOException { }; } + private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) { + states = bigArrays.grow(states, bucket + 1); + TDigestState state = states.get(bucket); + if (state == null) { + state = new TDigestState(compression); + states.set(bucket, state); + } + return state; + } + @Override public boolean hasMetric(String name) { return indexOfKey(keys, Double.parseDouble(name)) >= 0; From bd59238678eb0fc846581bf854db50be79417144 Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 10:17:36 +0100 Subject: [PATCH 09/24] remove unused imports --- .../HDRPreAggregatedPercentileRanksAggregatorTests.java | 4 ---- .../mapper/HDRPreAggregatedPercentilesAggregatorTests.java | 3 --- 2 files changed, 7 deletions(-) diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java index 3e5f8e8986e1f..702b073264658 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java @@ -27,10 +27,6 @@ import java.io.IOException; import java.util.Iterator; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - public class HDRPreAggregatedPercentileRanksAggregatorTests extends AggregatorTestCase { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java index 2ce66ff358125..5905fb16691b4 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java @@ -28,10 +28,7 @@ import java.io.IOException; import java.util.Iterator; -import java.util.List; import java.util.function.Consumer; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; import static java.util.Collections.singleton; From 71886a86afa84f872089aa945c6756c8294cfce2 Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 10:19:07 +0100 Subject: [PATCH 10/24] rename test class --- ...ationTests.java => HistogramPercentileAggregationTests.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/{HistogramAggregationTests.java => HistogramPercentileAggregationTests.java} (99%) diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java similarity index 99% rename from x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java rename to x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java index af1d7266e9f0c..d1f832c3c09b8 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramAggregationTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java @@ -36,7 +36,7 @@ import java.util.List; -public class HistogramAggregationTests extends ESSingleNodeTestCase { +public class HistogramPercentileAggregationTests extends ESSingleNodeTestCase { public void testHDRHistogram() throws Exception { From 793a2572b189256f34205df5ca799fb9ad3121ee Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 10:27:27 +0100 Subject: [PATCH 11/24] Detect in the constructor if we expect histogram value source and use that to decide the collecting method --- .../metrics/AbstractHDRPercentilesAggregator.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java index f80904443663c..1db86a308a4c3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java @@ -53,12 +53,14 @@ private static int indexOfKey(double[] keys, double key) { protected ObjectArray states; protected final int numberOfSignificantValueDigits; protected final boolean keyed; + protected final boolean isHistogramValueSource; AbstractHDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] keys, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, List pipelineAggregators, Map metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); this.valuesSource = valuesSource; + this.isHistogramValueSource = valuesSource instanceof ValuesSource.Histogram; this.keyed = keyed; this.format = formatter; this.states = context.bigArrays().newObjectArray(1); @@ -78,7 +80,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); - if (valuesSource instanceof ValuesSource.Histogram) { + if (isHistogramValueSource) { final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx); return collectHistogramValues(values, bigArrays, sub); } else { @@ -137,7 +139,6 @@ private DoubleHistogram getExistingOrNewHistogram(final BigArrays bigArrays, lon return state; } - @Override public boolean hasMetric(String name) { return indexOfKey(keys, Double.parseDouble(name)) >= 0; From 579c05c1f00d90b759f8c93d101c34cb8b618265 Mon Sep 17 00:00:00 2001 From: iverase Date: Tue, 29 Oct 2019 10:46:23 +0100 Subject: [PATCH 12/24] revert last change --- .../metrics/AbstractHDRPercentilesAggregator.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java index 1db86a308a4c3..1a3706b07056e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java @@ -53,14 +53,12 @@ private static int indexOfKey(double[] keys, double key) { protected ObjectArray states; protected final int numberOfSignificantValueDigits; protected final boolean keyed; - protected final boolean isHistogramValueSource; AbstractHDRPercentilesAggregator(String name, ValuesSource valuesSource, SearchContext context, Aggregator parent, double[] keys, int numberOfSignificantValueDigits, boolean keyed, DocValueFormat formatter, List pipelineAggregators, Map metaData) throws IOException { super(name, context, parent, pipelineAggregators, metaData); this.valuesSource = valuesSource; - this.isHistogramValueSource = valuesSource instanceof ValuesSource.Histogram; this.keyed = keyed; this.format = formatter; this.states = context.bigArrays().newObjectArray(1); @@ -80,7 +78,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); - if (isHistogramValueSource) { + if (valuesSource instanceof ValuesSource.Histogram) { final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx); return collectHistogramValues(values, bigArrays, sub); } else { From af1249f0efae529e5c227af41fe62396001f15da Mon Sep 17 00:00:00 2001 From: iverase Date: Thu, 31 Oct 2019 09:30:21 +0100 Subject: [PATCH 13/24] Values must be provided in increasing order --- .../mapper/HistogramFieldMapper.java | 11 +++++++++- .../mapper/HistogramFieldMapperTests.java | 22 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 763792deacbd6..0e8323d70cb25 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -368,10 +368,19 @@ public void parse(ParseContext context) { ensureExpectedToken(XContentParser.Token.START_ARRAY, token, context.parser()::getTokenLocation); values = new DoubleArrayList(); token = context.parser().nextToken(); + double previousVal = -Double.MAX_VALUE; while (token != XContentParser.Token.END_ARRAY) { // should be a number ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, context.parser()::getTokenLocation); - values.add(context.parser().doubleValue()); + double val = context.parser().doubleValue(); + if (val < previousVal) { + // values must be in increasing order + throw new MapperParsingException("error parsing field [" + + name() + "], ["+ COUNTS_FIELD + "] values must be in increasing order, got [" + val + + "] but previous value was [" + previousVal +"]"); + } + values.add(val); + previousVal = val; token = context.parser().nextToken(); } } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index cf9af5dafb7c3..0734fc1648ecc 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -288,6 +288,28 @@ public void testCountIsLong() throws Exception { assertThat(e.getCause().getMessage(), containsString(" out of range of int")); } + public void testValuesNotInOrder() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + SourceToParse source = new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("counts", new long[] {2, 8, 4}) + .field("values", new double[] {2 ,3 ,2}) + .endObject() + .endObject()), + XContentType.JSON); + + Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); + assertThat(e.getCause().getMessage(), containsString(" values must be in increasing order, " + + "got [2.0] but previous value was [3.0]")); + } + public void testFieldNotObject() throws Exception { ensureGreen(); XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") From 1cb8f538f507c723a24f022c979e481b12c69e54 Mon Sep 17 00:00:00 2001 From: iverase Date: Thu, 31 Oct 2019 09:50:28 +0100 Subject: [PATCH 14/24] Handling null value and do not fail if arrays are empty, trate it as a null value --- .../mapper/HistogramFieldMapper.java | 8 +++++-- .../mapper/HistogramFieldMapperTests.java | 24 +++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 0e8323d70cb25..cfbc01433fcbf 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -355,6 +355,10 @@ public void parse(ParseContext context) { DoubleArrayList values = null; IntArrayList counts = null; XContentParser.Token token = context.parser().currentToken(); + if (token == XContentParser.Token.VALUE_NULL) { + context.path().remove(); + return; + } // should be an object ensureExpectedToken(XContentParser.Token.START_OBJECT, token, context.parser()::getTokenLocation); token = context.parser().nextToken(); @@ -415,8 +419,8 @@ public void parse(ParseContext context) { "[" + COUNTS_FIELD.getPreferredName() +"] but got [" + values.size() + " != " + counts.size() +"]"); } if (values.size() == 0) { - throw new MapperParsingException("error parsing field [" - + name() + "], arrays for values and counts cannot be empty"); + context.path().remove(); + return; } if (fieldType().hasDocValues()) { BytesStreamOutput streamOutput = new BytesStreamOutput(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index 0734fc1648ecc..347b6bc053a3a 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -87,17 +87,33 @@ public void testEmptyArrays() throws Exception { DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() .parse("_doc", new CompressedXContent(mapping)); - SourceToParse source = new SourceToParse("test", "1", + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", BytesReference.bytes(XContentFactory.jsonBuilder() .startObject().field("pre_aggregated").startObject() .field("values", new double[] {}) .field("counts", new int[] {}) .endObject() .endObject()), - XContentType.JSON); + XContentType.JSON)); - Exception e = expectThrows(MapperParsingException.class, () -> defaultMapper.parse(source)); - assertThat(e.getCause().getMessage(), containsString("arrays for values and counts cannot be empty")); + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + } + + public void testNullValue() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().nullField("pre_aggregated") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); } public void testMissingFieldCounts() throws Exception { From 93229e50c2d144dc77caaa4cb7d312677019fec0 Mon Sep 17 00:00:00 2001 From: iverase Date: Thu, 31 Oct 2019 09:58:40 +0100 Subject: [PATCH 15/24] Handle ignore malformed properly --- .../mapper/HistogramFieldMapper.java | 8 +++----- .../mapper/HistogramFieldMapperTests.java | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index cfbc01433fcbf..801e90d8c9334 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -443,13 +443,11 @@ public void parse(ParseContext context) { } } catch (Exception ex) { - if (ignoreMalformed.value()) { - return; + if (ignoreMalformed.value() == false) { + throw new MapperParsingException("failed to parse field [{}] of type [{}]", + ex, fieldType().name(), fieldType().typeName()); } - throw new MapperParsingException("failed to parse field [{}] of type [{}]", - ex, fieldType().name(), fieldType().typeName()); } - context.path().remove(); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index 347b6bc053a3a..edc4e49c6e971 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -136,6 +136,26 @@ public void testMissingFieldCounts() throws Exception { assertThat(e.getCause().getMessage(), containsString("expected field called [counts]")); } + public void testIgnoreMalformed() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {2, 2}) + .endObject() + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + } + public void testMissingFieldValues() throws Exception { ensureGreen(); XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") From edec44802904940fe5220c10464a0795c2674179 Mon Sep 17 00:00:00 2001 From: iverase Date: Thu, 31 Oct 2019 16:05:25 +0100 Subject: [PATCH 16/24] initial documentation for the new field --- .../metrics/percentile-aggregation.asciidoc | 4 +- .../percentile-rank-aggregation.asciidoc | 4 +- docs/reference/mapping/types.asciidoc | 3 + .../mapping/types/histogram.asciidoc | 89 +++++++++++++++++++ 4 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 docs/reference/mapping/types/histogram.asciidoc diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc index 4e5f330a35c1f..920cbdaa27ffc 100644 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc @@ -3,8 +3,8 @@ A `multi-value` metrics aggregation that calculates one or more percentiles over numeric values extracted from the aggregated documents. These values -can be extracted either from specific numeric fields in the documents, or -be generated by a provided script. +can be extracted either from specific numeric or histogram fields in the documents, +or be generated by a provided script. Percentiles show the point at which a certain percentage of observed values occur. For example, the 95th percentile is the value which is greater than 95% diff --git a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc index ba05bdb902990..c43ed2cfa2312 100644 --- a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc @@ -3,8 +3,8 @@ A `multi-value` metrics aggregation that calculates one or more percentile ranks over numeric values extracted from the aggregated documents. These values -can be extracted either from specific numeric fields in the documents, or -be generated by a provided script. +can be extracted either from specific numeric fields or histogram in the documents, +or be generated by a provided script. [NOTE] ================================================== diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index 8cd9e0f44f92c..ea2f480a86735 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -32,6 +32,7 @@ string:: <> and <> <>:: `ip` for IPv4 and IPv6 addresses <>:: `completion` to provide auto-complete suggestions +<>:: Record pre-aggregated numerical values to be used in percentiles aggregations. <>:: `token_count` to count the number of tokens in a string {plugins}/mapper-murmur3.html[`mapper-murmur3`]:: `murmur3` to compute hashes of values at index-time and store them in the index {plugins}/mapper-annotated-text.html[`mapper-annotated-text`]:: `annotated-text` to index text containing special markup (typically used for identifying named entities) @@ -91,6 +92,8 @@ include::types/date_nanos.asciidoc[] include::types/dense-vector.asciidoc[] +include::types/histogram.asciidoc[] + include::types/flattened.asciidoc[] include::types/geo-point.asciidoc[] diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc new file mode 100644 index 0000000000000..76b3330d8707f --- /dev/null +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -0,0 +1,89 @@ +[role="xpack"] +[testenv="basic"] +[[histogram]] +=== Histogram datatype +++++ +Histogram +++++ + +A `histogram` field stores pre-aggregated numerical data represented as a histogram. These histograms +are defined by a set of pairs, one being the value representing the bucket and the count of that value. +THe values should be defined accordingly to the type of aggregation this field will be used for. + +These histograms cannot be used for searching except with <>. +It can be used for aggregations, in particular <> +and <> aggregations are supported. + +You index a histogram by defining two arrays, one containing the `values` for the histogram and the other containing +the `counts` for such values. Those two arrays must have the same length. + +[source,console] +-------------------------------------------------- +PUT my_index +{ + "mappings": { + "properties": { + "my_histogram": { + "type": "histogram", + }, + "my_text" : { + "type" : "keyword" + } + } + } +} + +PUT my_index/_doc/1 +{ + "my_text" : "histogram_1", + "my_histogram" : { + "values" : [0.1, 0.2, 0.3, 0.4, 0.5], <1> + "counts" : [3, 7, 23, 12, 6] <2> + } +} + +PUT my_index/_doc/2 +{ + "my_text" : "histogram_2", + "my_histogram" : { + "values" : [0.1, 0.25, 0.35, 0.4, 0.45, 0.5], <1> + "counts" : [8, 17, 8, 7, 6, 2] <2> + } +} + +-------------------------------------------------- + +<1> Values for each bucket. Values in the array are treated as doubles and must be given in +increasing order. In case if TDDigest histograms this value represents the mean value. +In case of HDR histograms this represents the value iterated to. +<2> Count for each bucket. Values in the arrays are treated as integers and must be positive or zero. +negative values will be rejected. The relation between a bucket and a count is given by the position in the array. + +NOTE: `histogram` fields only support single-valued fields. Multi-valued fields +will be rejected. + +NOTE: `histogram` fields with null field are ignored. Fields with empty `values` and `counts` +arrays are treated as null. + +NOTE: `rank_feature` fields do not support querying or sorting. They may +only be used within <> queries. + +Internally, each document's histogram is encoded as a binary +doc value. Its size in bytes is at most `12 * numValues`, +where `numValues` is the length of the provided arrays. + +[[mapping-types-histogram-building-histogram]] +==== Building a histogram + +When using a histogram as part of an aggregation, the accuracy of the results will depend on how the +histogram was constructed. It is important to consider the percentiles aggregation mode that will be used +in order to build it. Some possibilities are: + +- For the <> mode, histograms +can be built by using the mean value of the centroids and the centroid's count. If the algorithm has already +started to approximate the percentiles, this inaccuracy is carried over in the histogram. + +- For the https://github.com/HdrHistogram/HdrHistogram[HDR Histogram] (High Dynamic Range Histogram) method, histograms +can be created by using the recorded values and the count at that value. This implementation maintains a fixed worse-case +percentage error (specified as a number of significant digits), therefore the value used when generating the histogram +would be the maximum accuracy you can achieve at aggregation time. From adf12a44a53788e607c33f0ac904eb0157e89bf1 Mon Sep 17 00:00:00 2001 From: iverase Date: Thu, 31 Oct 2019 17:44:02 +0100 Subject: [PATCH 17/24] initial documentation for the new field --- docs/reference/mapping/types/histogram.asciidoc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index 76b3330d8707f..d76b76abc94f7 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -10,11 +10,10 @@ A `histogram` field stores pre-aggregated numerical data represented as a histog are defined by a set of pairs, one being the value representing the bucket and the count of that value. THe values should be defined accordingly to the type of aggregation this field will be used for. -These histograms cannot be used for searching except with <>. -It can be used for aggregations, in particular <> +Histograms can be used for aggregations, in particular <> and <> aggregations are supported. -You index a histogram by defining two arrays, one containing the `values` for the histogram and the other containing +Histograms are indexed by defining two arrays, one containing the `values` (or buckets) for the histogram and the other containing the `counts` for such values. Those two arrays must have the same length. [source,console] @@ -50,7 +49,6 @@ PUT my_index/_doc/2 "counts" : [8, 17, 8, 7, 6, 2] <2> } } - -------------------------------------------------- <1> Values for each bucket. Values in the array are treated as doubles and must be given in @@ -66,7 +64,7 @@ NOTE: `histogram` fields with null field are ignored. Fields with empty `values` arrays are treated as null. NOTE: `rank_feature` fields do not support querying or sorting. They may -only be used within <> queries. +only be used within <> queries. Internally, each document's histogram is encoded as a binary doc value. Its size in bytes is at most `12 * numValues`, From 3c5892edff4ea3746b827c84683e0d138bfd6943 Mon Sep 17 00:00:00 2001 From: iverase Date: Fri, 1 Nov 2019 10:43:01 +0100 Subject: [PATCH 18/24] Addressed docs review --- .../metrics/percentile-aggregation.asciidoc | 6 +- .../percentile-rank-aggregation.asciidoc | 6 +- docs/reference/mapping/types.asciidoc | 4 +- .../mapping/types/histogram.asciidoc | 106 +++++++++++------- 4 files changed, 77 insertions(+), 45 deletions(-) diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc index 920cbdaa27ffc..f70a2c0761421 100644 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc @@ -2,9 +2,9 @@ === Percentiles Aggregation A `multi-value` metrics aggregation that calculates one or more percentiles -over numeric values extracted from the aggregated documents. These values -can be extracted either from specific numeric or histogram fields in the documents, -or be generated by a provided script. +over numeric values extracted from the aggregated documents. These values can be +generated by a provided script or extracted from specific numeric or histogram +fields in the documents. Percentiles show the point at which a certain percentage of observed values occur. For example, the 95th percentile is the value which is greater than 95% diff --git a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc index c43ed2cfa2312..4c4d048f25a25 100644 --- a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc @@ -2,9 +2,9 @@ === Percentile Ranks Aggregation A `multi-value` metrics aggregation that calculates one or more percentile ranks -over numeric values extracted from the aggregated documents. These values -can be extracted either from specific numeric fields or histogram in the documents, -or be generated by a provided script. +over numeric values extracted from the aggregated documents. These values can be +generated by a provided script or extracted from specific numeric or histogram +fields in the documents. [NOTE] ================================================== diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index ea2f480a86735..89898c27ccb04 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -32,7 +32,7 @@ string:: <> and <> <>:: `ip` for IPv4 and IPv6 addresses <>:: `completion` to provide auto-complete suggestions -<>:: Record pre-aggregated numerical values to be used in percentiles aggregations. + <>:: `token_count` to count the number of tokens in a string {plugins}/mapper-murmur3.html[`mapper-murmur3`]:: `murmur3` to compute hashes of values at index-time and store them in the index {plugins}/mapper-annotated-text.html[`mapper-annotated-text`]:: `annotated-text` to index text containing special markup (typically used for identifying named entities) @@ -57,6 +57,8 @@ string:: <> and <> <>:: `shape` for arbitrary cartesian geometries. +<>:: `histogram` for pre-aggregated numerical values for percentiles aggregations. + [float] [[types-array-handling]] === Arrays diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index d76b76abc94f7..0c62cd919cc06 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -6,16 +6,67 @@ Histogram ++++ -A `histogram` field stores pre-aggregated numerical data represented as a histogram. These histograms -are defined by a set of pairs, one being the value representing the bucket and the count of that value. -THe values should be defined accordingly to the type of aggregation this field will be used for. +A field to store pre-aggregated numerical data representing a histogram. +This data is defined using two paired arrays: -Histograms can be used for aggregations, in particular <> -and <> aggregations are supported. +* A `values` array of <> numbers, representing the buckets for +the histogram. These values must be provided in ascending order. +* A corresponding `counts` array of <> numbers, representing how +many values fall into each bucket. These numbers must be positive or zero. -Histograms are indexed by defining two arrays, one containing the `values` (or buckets) for the histogram and the other containing -the `counts` for such values. Those two arrays must have the same length. +Because the elements in the `values` array correspond to the elements in the +same position of the `count` array, these two arrays must have the same length. +[IMPORTANT] +======== +* A `histogram` field can only store a single pair of `values` and `count` arrays +per document. Nested arrays are not supported. +* A `histogram` field with empty `values` and `counts` arrays is treated as `null`. +* `histogram` fields do not support sorting. +======== + +[[histogram-uses]] +==== Uses + +`histogram` fields are primarily intended for use with aggregations. To make it +more readily accessible for aggregations, `histogram` field data is stored as a +binary <> and not indexed. Its size in bytes is at most +`12 * numValues`, where `numValues` is the length of the provided arrays. + +Because the data is not indexed, you only can use `histogram` fields for the +following aggregations and queries: + +* <> aggregation +* <> aggregation +* <> query + +We recommend you define the buckets in the `values` array based on the type of aggregation you intended to use. + +[[mapping-types-histogram-building-histogram]] +==== Building a histogram + +When using a histogram as part of an aggregation, the accuracy of the results will depend on how the +histogram was constructed. It is important to consider the percentiles aggregation mode that will be used +to build it. Some possibilities include: + +- For the <> mode, histograms +can be built by using the mean value of the centroids and the centroid's count. If the algorithm has already +started to approximate the percentiles, this inaccuracy is carried over in the histogram. + +- For the <<_hdr_histogram,High Dynamic Range (HDR)>> histogram mode, histograms +can be created by using the recorded values and the count at that value. This implementation maintains a fixed worse-case +percentage error (specified as a number of significant digits), therefore the value used when generating the histogram +would be the maximum accuracy you can achieve at aggregation time. + +[[histogram-ex]] +==== Examples + +The following <> API request creates a new index with two field mappings: + +* `my_histogram`, a `histogram` field used to store percentile data +* `my_text`, a `keyword` field used to store a title for the histogram + +[ INSERT CREATE INDEX SNIPPET ] [source,console] -------------------------------------------------- PUT my_index @@ -23,7 +74,7 @@ PUT my_index "mappings": { "properties": { "my_histogram": { - "type": "histogram", + "type" : "histogram" }, "my_text" : { "type" : "keyword" @@ -31,7 +82,13 @@ PUT my_index } } } +-------------------------------------------------- + +The following <> API requests store pre-aggregated for +two histograms: `histogram_1` and `histogram_2`. +[source,console] +-------------------------------------------------- PUT my_index/_doc/1 { "my_text" : "histogram_1", @@ -50,38 +107,11 @@ PUT my_index/_doc/2 } } -------------------------------------------------- - <1> Values for each bucket. Values in the array are treated as doubles and must be given in -increasing order. In case if TDDigest histograms this value represents the mean value. -In case of HDR histograms this represents the value iterated to. +increasing order. For <> +histograms this value represents the mean value. In case of HDR histograms this represents the value iterated to. <2> Count for each bucket. Values in the arrays are treated as integers and must be positive or zero. -negative values will be rejected. The relation between a bucket and a count is given by the position in the array. +Negative values will be rejected. The relation between a bucket and a count is given by the position in the array. -NOTE: `histogram` fields only support single-valued fields. Multi-valued fields -will be rejected. -NOTE: `histogram` fields with null field are ignored. Fields with empty `values` and `counts` -arrays are treated as null. - -NOTE: `rank_feature` fields do not support querying or sorting. They may -only be used within <> queries. - -Internally, each document's histogram is encoded as a binary -doc value. Its size in bytes is at most `12 * numValues`, -where `numValues` is the length of the provided arrays. - -[[mapping-types-histogram-building-histogram]] -==== Building a histogram -When using a histogram as part of an aggregation, the accuracy of the results will depend on how the -histogram was constructed. It is important to consider the percentiles aggregation mode that will be used -in order to build it. Some possibilities are: - -- For the <> mode, histograms -can be built by using the mean value of the centroids and the centroid's count. If the algorithm has already -started to approximate the percentiles, this inaccuracy is carried over in the histogram. - -- For the https://github.com/HdrHistogram/HdrHistogram[HDR Histogram] (High Dynamic Range Histogram) method, histograms -can be created by using the recorded values and the count at that value. This implementation maintains a fixed worse-case -percentage error (specified as a number of significant digits), therefore the value used when generating the histogram -would be the maximum accuracy you can achieve at aggregation time. From 19f15a213e18d3ab288a80c77dc7690e9d40bb60 Mon Sep 17 00:00:00 2001 From: iverase Date: Fri, 1 Nov 2019 13:31:35 +0100 Subject: [PATCH 19/24] Add HistogramFieldTypeTests --- .../mapper/HistogramFieldMapper.java | 11 ++++++----- .../mapper/HistogramFieldTypeTests.java | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldTypeTests.java diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 801e90d8c9334..b525584dac491 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -69,7 +69,7 @@ public static class Names { public static class Defaults { public static final Explicit IGNORE_MALFORMED = new Explicit<>(false, false); - public static final HDRPercentilesFieldType FIELD_TYPE = new HDRPercentilesFieldType(); + public static final HistogramFieldType FIELD_TYPE = new HistogramFieldType(); static { FIELD_TYPE.setTokenized(false); @@ -193,11 +193,12 @@ protected void parseCreateField(ParseContext context, List field throw new UnsupportedOperationException("Parsing is implemented in parse(), this method should NEVER be called"); } - public static class HDRPercentilesFieldType extends MappedFieldType { - public HDRPercentilesFieldType() { + public static class HistogramFieldType extends MappedFieldType { + + public HistogramFieldType() { } - HDRPercentilesFieldType(HDRPercentilesFieldType ref) { + HistogramFieldType(HistogramFieldType ref) { super(ref); } @@ -208,7 +209,7 @@ public String typeName() { @Override public MappedFieldType clone() { - return new HDRPercentilesFieldType(this); + return new HistogramFieldType(this); } @Override diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldTypeTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldTypeTests.java new file mode 100644 index 0000000000000..82ef60fe0ccb5 --- /dev/null +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldTypeTests.java @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + + +package org.elasticsearch.xpack.analytics.mapper; + +import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.MappedFieldType; + +public class HistogramFieldTypeTests extends FieldTypeTestCase { + + @Override + protected MappedFieldType createDefaultFieldType() { + return new HistogramFieldMapper.HistogramFieldType(); + } +} From 1f6383d5bdccb74cb4ec7a8cdd21fe2bfee8be83 Mon Sep 17 00:00:00 2001 From: iverase Date: Sun, 3 Nov 2019 12:23:03 +0100 Subject: [PATCH 20/24] address last review comments --- .../mapping/types/histogram.asciidoc | 1 - .../mapper/HistogramFieldMapper.java | 19 +++++------ ...eAggregatedPercentilesAggregatorTests.java | 33 ++++++++++++------- .../mapper/HistogramFieldMapperTests.java | 16 ++++----- ...eAggregatedPercentilesAggregatorTests.java | 24 +++++++++----- 5 files changed, 55 insertions(+), 38 deletions(-) diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index 0c62cd919cc06..db5aebd3cb52c 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -21,7 +21,6 @@ same position of the `count` array, these two arrays must have the same length. ======== * A `histogram` field can only store a single pair of `values` and `count` arrays per document. Nested arrays are not supported. -* A `histogram` field with empty `values` and `counts` arrays is treated as `null`. * `histogram` fields do not support sorting. ======== diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index b525584dac491..d3c95d185d069 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -58,7 +58,6 @@ /** * Field Mapper for pre-aggregated histograms. - * */ public class HistogramFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "histogram"; @@ -353,13 +352,13 @@ public void parse(ParseContext context) { } context.path().add(simpleName()); try { - DoubleArrayList values = null; - IntArrayList counts = null; XContentParser.Token token = context.parser().currentToken(); if (token == XContentParser.Token.VALUE_NULL) { context.path().remove(); return; } + DoubleArrayList values = null; + IntArrayList counts = null; // should be an object ensureExpectedToken(XContentParser.Token.START_OBJECT, token, context.parser()::getTokenLocation); token = context.parser().nextToken(); @@ -419,19 +418,18 @@ public void parse(ParseContext context) { + name() + "], expected same length from [" + VALUES_FIELD.getPreferredName() +"] and " + "[" + COUNTS_FIELD.getPreferredName() +"] but got [" + values.size() + " != " + counts.size() +"]"); } - if (values.size() == 0) { - context.path().remove(); - return; - } if (fieldType().hasDocValues()) { BytesStreamOutput streamOutput = new BytesStreamOutput(); for (int i = 0; i < values.size(); i++) { - streamOutput.writeDouble(values.get(i)); - if (counts.get(i) < 0) { + int count = counts.get(i); + if (count < 0) { throw new MapperParsingException("error parsing field [" + name() + "], ["+ COUNTS_FIELD + "] elements must be >= 0 but got " + counts.get(i)); + } else if (count > 0) { + // we do not add elements with count == 0 + streamOutput.writeDouble(values.get(i)); + streamOutput.writeVInt(count); } - streamOutput.writeVInt(counts.get(i)); } Field field = new BinaryDocValuesField(simpleName(), streamOutput.bytes().toBytesRef()); @@ -448,6 +446,7 @@ public void parse(ParseContext context) { throw new MapperParsingException("failed to parse field [{}] of type [{}]", ex, fieldType().name(), fieldType().typeName()); } + context.addIgnoredField(fieldType().name()); } context.path().remove(); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java index 5905fb16691b4..4f259c2b39b45 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java @@ -34,20 +34,23 @@ public class HDRPreAggregatedPercentilesAggregatorTests extends AggregatorTestCase { - private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws IOException { + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { DoubleHistogram histogram = new DoubleHistogram(3);//default - for (int value : values) { + for (double value : values) { histogram.recordValue(value); } BytesStreamOutput streamOutput = new BytesStreamOutput(); DoubleHistogram.RecordedValues recordedValues = histogram.recordedValues(); Iterator iterator = recordedValues.iterator(); while (iterator.hasNext()) { + DoubleHistogramIterationValue value = iterator.next(); - double d = value.getValueIteratedTo(); - streamOutput.writeDouble(d); long count = value.getCountAtValueIteratedTo(); - streamOutput.writeVInt(Math.toIntExact(count)); + if (count != 0) { + double d = value.getValueIteratedTo(); + streamOutput.writeDouble(d); + streamOutput.writeVInt(Math.toIntExact(count)); + } } return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); @@ -55,16 +58,24 @@ private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws public void testNoMatchingField() throws IOException { testCase(new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(getDocValue("wrong_number", new int[]{7, 1}))); + iw.addDocument(singleton(getDocValue("wrong_number", new double[]{7, 1}))); }, hdr -> { //assertEquals(0L, hdr.state.getTotalCount()); assertFalse(AggregationInspectionHelper.hasValue(hdr)); }); } + public void testEmptyField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("number", new double[0]))); + }, hdr -> { + assertFalse(AggregationInspectionHelper.hasValue(hdr)); + }); + } + public void testSomeMatchesBinaryDocValues() throws IOException { testCase(new DocValuesFieldExistsQuery("number"), iw -> { - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); }, hdr -> { //assertEquals(4L, hdr.state.getTotalCount()); double approximation = 0.05d; @@ -78,10 +89,10 @@ public void testSomeMatchesBinaryDocValues() throws IOException { public void testSomeMatchesMultiBinaryDocValues() throws IOException { testCase(new DocValuesFieldExistsQuery("number"), iw -> { - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); }, hdr -> { //assertEquals(16L, hdr.state.getTotalCount()); double approximation = 0.05d; diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index edc4e49c6e971..b1982ab21b3d0 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -43,8 +43,8 @@ public void testParseValue() throws Exception { ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", BytesReference.bytes(XContentFactory.jsonBuilder() .startObject().field("pre_aggregated").startObject() - .field("values", new int[] {0, 0}) - .field("counts", new long[] {0, 0}) + .field("values", new double[] {2, 3}) + .field("counts", new int[] {0, 4}) .endObject() .endObject()), XContentType.JSON)); @@ -96,7 +96,7 @@ public void testEmptyArrays() throws Exception { .endObject()), XContentType.JSON)); - assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("pre_aggregated"), notNullValue()); } public void testNullValue() throws Exception { @@ -335,7 +335,7 @@ public void testValuesNotInOrder() throws Exception { SourceToParse source = new SourceToParse("test", "1", BytesReference.bytes(XContentFactory.jsonBuilder() .startObject().field("pre_aggregated").startObject() - .field("counts", new long[] {2, 8, 4}) + .field("counts", new int[] {2, 8, 4}) .field("values", new double[] {2 ,3 ,2}) .endObject() .endObject()), @@ -443,8 +443,8 @@ public void testSetDocValuesField() throws Exception { ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", BytesReference.bytes(XContentFactory.jsonBuilder() .startObject().field("pre_aggregated").startObject() - .field("values", new double[] {0, 0}) - .field("counts", new int[] {0, 0}) + .field("values", new double[] {2, 3}) + .field("counts", new int[] {4, 6}) .endObject() .endObject()), XContentType.JSON)); @@ -462,8 +462,8 @@ public void testSetDocValuesField() throws Exception { doc = defaultMapper.parse(new SourceToParse("test", "1", BytesReference.bytes(XContentFactory.jsonBuilder() .startObject().field("pre_aggregated").startObject() - .field("values", new double[] {0, 0}) - .field("counts", new int[] {0, 0}) + .field("values", new double[] {2, 3}) + .field("counts", new int[] {4, 6}) .endObject() .endObject()), XContentType.JSON)); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java index 47be63147791e..dddf900df90ab 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java @@ -36,9 +36,9 @@ public class TDigestPreAggregatedPercentilesAggregatorTests extends AggregatorTestCase { - private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws IOException { + private BinaryDocValuesField getDocValue(String fieldName, double[] values) throws IOException { TDigest histogram = new TDigestState(100.0); //default - for (int value : values) { + for (double value : values) { histogram.add(value); } BytesStreamOutput streamOutput = new BytesStreamOutput(); @@ -55,16 +55,24 @@ private BinaryDocValuesField getDocValue(String fieldName, int[] values) throws public void testNoMatchingField() throws IOException { testCase(new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(getDocValue("wrong_number", new int[]{7, 1}))); + iw.addDocument(singleton(getDocValue("wrong_number", new double[]{7, 1}))); }, hdr -> { //assertEquals(0L, hdr.state.getTotalCount()); assertFalse(AggregationInspectionHelper.hasValue(hdr)); }); } + public void testEmptyField() throws IOException { + testCase(new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(getDocValue("number", new double[0]))); + }, hdr -> { + assertFalse(AggregationInspectionHelper.hasValue(hdr)); + }); + } + public void testSomeMatchesBinaryDocValues() throws IOException { testCase(new DocValuesFieldExistsQuery("number"), iw -> { - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); }, hdr -> { //assertEquals(4L, hdr.state.getTotalCount()); double approximation = 0.05d; @@ -78,10 +86,10 @@ public void testSomeMatchesBinaryDocValues() throws IOException { public void testSomeMatchesMultiBinaryDocValues() throws IOException { testCase(new DocValuesFieldExistsQuery("number"), iw -> { - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); - iw.addDocument(singleton(getDocValue("number", new int[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); + iw.addDocument(singleton(getDocValue("number", new double[]{60, 40, 20, 10}))); }, hdr -> { //assertEquals(16L, hdr.state.getTotalCount()); double approximation = 0.05d; From fbabf1c599ec216ad048846c0850c879974551f6 Mon Sep 17 00:00:00 2001 From: iverase Date: Wed, 27 Nov 2019 13:34:15 +0100 Subject: [PATCH 21/24] Make sure that in ignore malformed we move to the end of the field --- .../mapper/HistogramFieldMapper.java | 25 ++++++- .../mapper/HistogramFieldMapperTests.java | 72 +++++++++++++++++++ 2 files changed, 95 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index d3c95d185d069..ada6d1471cef6 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -346,13 +346,15 @@ public Query termQuery(Object value, QueryShardContext context) { } @Override - public void parse(ParseContext context) { + public void parse(ParseContext context) throws IOException { if (context.externalValueSet()) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] can't be used in multi-fields"); } context.path().add(simpleName()); + XContentParser.Token token = null; + int level = 0; try { - XContentParser.Token token = context.parser().currentToken(); + token = context.parser().currentToken(); if (token == XContentParser.Token.VALUE_NULL) { context.path().remove(); return; @@ -404,7 +406,9 @@ public void parse(ParseContext context) { name() + "], with unknown parameter [" + fieldName + "]"); } token = context.parser().nextToken(); + level = maybeAddOrRemoveLevel(token, level); } + level = 0; if (values == null) { throw new MapperParsingException("error parsing field [" + name() + "], expected field called [" + VALUES_FIELD.getPreferredName() + "]"); @@ -446,11 +450,28 @@ public void parse(ParseContext context) { throw new MapperParsingException("failed to parse field [{}] of type [{}]", ex, fieldType().name(), fieldType().typeName()); } + // we need to advance until the end of the field + if (token != null) { + while (level > 0 || token != XContentParser.Token.END_OBJECT) { + level = maybeAddOrRemoveLevel(token, level); + token = context.parser().nextToken(); + } + } context.addIgnoredField(fieldType().name()); } context.path().remove(); } + private int maybeAddOrRemoveLevel(XContentParser.Token token, int level) { + if (token == XContentParser.Token.START_OBJECT) { + return ++level; + } + if (token == XContentParser.Token.END_OBJECT) { + return --level; + } + return level; + } + @Override protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { super.doXContentBody(builder, includeDefaults, params); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index b1982ab21b3d0..5fe85e5b3df3a 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -156,6 +156,78 @@ public void testIgnoreMalformed() throws Exception { assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); } + public void testIgnoreMalformedSkipsField() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .field("values", new double[] {2, 2}) + .field("typo", new double[] {2, 2}) + .endObject() + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + + public void testIgnoreMalformedSkipsObjects() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject() + .startObject("values").field("values", new double[] {2, 2}) + .startObject("otherData").startObject("more").field("toto", 1) + .endObject().endObject() + .endObject() + .field("counts", new double[] {2, 2}) + .endObject() + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + + public void testIgnoreMalformedSkipsEmpty() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated").startObject().endObject() + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + public void testMissingFieldValues() throws Exception { ensureGreen(); XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") From f1a1ead58b7ff127d5f1a3e40cea4c65461de90b Mon Sep 17 00:00:00 2001 From: iverase Date: Thu, 28 Nov 2019 09:04:05 +0100 Subject: [PATCH 22/24] address review comments --- .../metrics/percentile-aggregation.asciidoc | 4 +- .../percentile-rank-aggregation.asciidoc | 4 +- .../mapping/types/histogram.asciidoc | 21 ++++--- .../AbstractHDRPercentilesAggregator.java | 2 +- .../mapper/HistogramFieldMapper.java | 59 ++++++++----------- ...regatedPercentileRanksAggregatorTests.java | 5 +- ...eAggregatedPercentilesAggregatorTests.java | 2 +- .../mapper/HistogramFieldMapperTests.java | 42 +++++++++++++ ...regatedPercentileRanksAggregatorTests.java | 2 +- ...eAggregatedPercentilesAggregatorTests.java | 2 +- 10 files changed, 88 insertions(+), 55 deletions(-) diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc index f70a2c0761421..9d4476c779997 100644 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc @@ -3,8 +3,8 @@ A `multi-value` metrics aggregation that calculates one or more percentiles over numeric values extracted from the aggregated documents. These values can be -generated by a provided script or extracted from specific numeric or histogram -fields in the documents. +generated by a provided script or extracted from specific numeric or +<> in the documents. Percentiles show the point at which a certain percentage of observed values occur. For example, the 95th percentile is the value which is greater than 95% diff --git a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc index 4c4d048f25a25..d0765ea026846 100644 --- a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc @@ -3,8 +3,8 @@ A `multi-value` metrics aggregation that calculates one or more percentile ranks over numeric values extracted from the aggregated documents. These values can be -generated by a provided script or extracted from specific numeric or histogram -fields in the documents. +generated by a provided script or extracted from specific numeric or +<> in the documents. [NOTE] ================================================== diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index db5aebd3cb52c..b82e07167cb40 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -39,8 +39,6 @@ following aggregations and queries: * <> aggregation * <> query -We recommend you define the buckets in the `values` array based on the type of aggregation you intended to use. - [[mapping-types-histogram-building-histogram]] ==== Building a histogram @@ -48,14 +46,19 @@ When using a histogram as part of an aggregation, the accuracy of the results wi histogram was constructed. It is important to consider the percentiles aggregation mode that will be used to build it. Some possibilities include: -- For the <> mode, histograms -can be built by using the mean value of the centroids and the centroid's count. If the algorithm has already -started to approximate the percentiles, this inaccuracy is carried over in the histogram. +- For the <> mode, the `values` array represents +the mean centroid positions and the `counts` array represents the number of values that are attributed to each +centroid. If the algorithm has already started to approximate the percentiles, this inaccuracy is +carried over in the histogram. + +- For the <<_hdr_histogram,High Dynamic Range (HDR)>> histogram mode, the `values` array represents fixed upper +limits of each bucket interval, and the `counts` array represents the number of values that are attributed to each +interval. This implementation maintains a fixed worse-case percentage error (specified as a number of significant digits), +therefore the value used when generating the histogram would be the maximum accuracy you can achieve at aggregation time. -- For the <<_hdr_histogram,High Dynamic Range (HDR)>> histogram mode, histograms -can be created by using the recorded values and the count at that value. This implementation maintains a fixed worse-case -percentage error (specified as a number of significant digits), therefore the value used when generating the histogram -would be the maximum accuracy you can achieve at aggregation time. +The histogram field is "algorithm agnostic" and does not store data specific to either T-Digest or HDRHistogram. While this +means the field can technically be aggregated with either algorithm, in practice the user should chose one algorithm and +index data in that manner (e.g. centroids for T-Digest or intervals for HDRHistogram) to ensure best accuracy. [[histogram-ex]] ==== Examples diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java index 1a3706b07056e..5d9e616a39718 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java @@ -112,7 +112,7 @@ public void collect(int doc, long bucket) throws IOException { DoubleHistogram state = getExistingOrNewHistogram(bigArrays, bucket); if (values.advanceExact(doc)) { final HistogramValue sketch = values.histogram(); - while(sketch.next()) { + while (sketch.next()) { state.recordValueWithCount(sketch.value(), sketch.count()); } } diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index ada6d1471cef6..8a1f27f202ad8 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -26,6 +26,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentSubParser; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.fielddata.AtomicHistogramFieldData; @@ -298,8 +299,8 @@ private HistogramValue getHistogramValue(final BytesRef bytesRef) throws IOExcep @Override public boolean next() throws IOException { if (streamInput.available() > 0) { - value = streamInput.readDouble(); count = streamInput.readVInt(); + value = streamInput.readDouble(); return true; } isExhausted = true; @@ -352,7 +353,7 @@ public void parse(ParseContext context) throws IOException { } context.path().add(simpleName()); XContentParser.Token token = null; - int level = 0; + XContentSubParser subParser = null; try { token = context.parser().currentToken(); if (token == XContentParser.Token.VALUE_NULL) { @@ -363,22 +364,23 @@ public void parse(ParseContext context) throws IOException { IntArrayList counts = null; // should be an object ensureExpectedToken(XContentParser.Token.START_OBJECT, token, context.parser()::getTokenLocation); - token = context.parser().nextToken(); + subParser = new XContentSubParser(context.parser()); + token = subParser.nextToken(); while (token != XContentParser.Token.END_OBJECT) { // should be an field - ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, context.parser()::getTokenLocation); - String fieldName = context.parser().currentName(); + ensureExpectedToken(XContentParser.Token.FIELD_NAME, token, subParser::getTokenLocation); + String fieldName = subParser.currentName(); if (fieldName.equals(VALUES_FIELD.getPreferredName())) { - token = context.parser().nextToken(); + token = subParser.nextToken(); // should be an array - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, context.parser()::getTokenLocation); + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, subParser::getTokenLocation); values = new DoubleArrayList(); - token = context.parser().nextToken(); + token = subParser.nextToken(); double previousVal = -Double.MAX_VALUE; while (token != XContentParser.Token.END_ARRAY) { // should be a number - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, context.parser()::getTokenLocation); - double val = context.parser().doubleValue(); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser::getTokenLocation); + double val = subParser.doubleValue(); if (val < previousVal) { // values must be in increasing order throw new MapperParsingException("error parsing field [" @@ -387,28 +389,26 @@ public void parse(ParseContext context) throws IOException { } values.add(val); previousVal = val; - token = context.parser().nextToken(); + token = subParser.nextToken(); } } else if (fieldName.equals(COUNTS_FIELD.getPreferredName())) { - token = context.parser().nextToken(); + token = subParser.nextToken(); // should be an array - ensureExpectedToken(XContentParser.Token.START_ARRAY, token, context.parser()::getTokenLocation); + ensureExpectedToken(XContentParser.Token.START_ARRAY, token, subParser::getTokenLocation); counts = new IntArrayList(); - token = context.parser().nextToken(); + token = subParser.nextToken(); while (token != XContentParser.Token.END_ARRAY) { // should be a number - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, context.parser()::getTokenLocation); - counts.add(context.parser().intValue()); - token = context.parser().nextToken(); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser::getTokenLocation); + counts.add(subParser.intValue()); + token = subParser.nextToken(); } } else { throw new MapperParsingException("error parsing field [" + name() + "], with unknown parameter [" + fieldName + "]"); } - token = context.parser().nextToken(); - level = maybeAddOrRemoveLevel(token, level); + token = subParser.nextToken(); } - level = 0; if (values == null) { throw new MapperParsingException("error parsing field [" + name() + "], expected field called [" + VALUES_FIELD.getPreferredName() + "]"); @@ -431,8 +431,8 @@ public void parse(ParseContext context) throws IOException { + name() + "], ["+ COUNTS_FIELD + "] elements must be >= 0 but got " + counts.get(i)); } else if (count > 0) { // we do not add elements with count == 0 - streamOutput.writeDouble(values.get(i)); streamOutput.writeVInt(count); + streamOutput.writeDouble(values.get(i)); } } @@ -451,10 +451,9 @@ public void parse(ParseContext context) throws IOException { ex, fieldType().name(), fieldType().typeName()); } // we need to advance until the end of the field - if (token != null) { - while (level > 0 || token != XContentParser.Token.END_OBJECT) { - level = maybeAddOrRemoveLevel(token, level); - token = context.parser().nextToken(); + if (subParser != null) { + while (token != null) { + token = subParser.nextToken(); } } context.addIgnoredField(fieldType().name()); @@ -462,16 +461,6 @@ public void parse(ParseContext context) throws IOException { context.path().remove(); } - private int maybeAddOrRemoveLevel(XContentParser.Token token, int level) { - if (token == XContentParser.Token.START_OBJECT) { - return ++level; - } - if (token == XContentParser.Token.END_OBJECT) { - return --level; - } - return level; - } - @Override protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { super.doXContentBody(builder, includeDefaults, params); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java index 702b073264658..843ff9447fde5 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentileRanksAggregatorTests.java @@ -40,11 +40,10 @@ private BinaryDocValuesField getDocValue(String fieldName, double[] values) thro Iterator iterator = recordedValues.iterator(); while (iterator.hasNext()) { DoubleHistogramIterationValue value = iterator.next(); - double d = value.getValueIteratedTo(); - streamOutput.writeDouble(d); long count = value.getCountAtValueIteratedTo(); streamOutput.writeVInt(Math.toIntExact(count)); - + double d = value.getValueIteratedTo(); + streamOutput.writeDouble(d); } return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java index 4f259c2b39b45..5d4e5c05b2e9d 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HDRPreAggregatedPercentilesAggregatorTests.java @@ -47,9 +47,9 @@ private BinaryDocValuesField getDocValue(String fieldName, double[] values) thro DoubleHistogramIterationValue value = iterator.next(); long count = value.getCountAtValueIteratedTo(); if (count != 0) { + streamOutput.writeVInt(Math.toIntExact(count)); double d = value.getValueIteratedTo(); streamOutput.writeDouble(d); - streamOutput.writeVInt(Math.toIntExact(count)); } } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index 5fe85e5b3df3a..3f298c40b7296 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -156,6 +156,48 @@ public void testIgnoreMalformed() throws Exception { assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); } + public void testIgnoreMalformedSkipsKeyword() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated", "value") + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + + public void testIgnoreMalformedSkipsArray() throws Exception { + ensureGreen(); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") + .startObject("properties").startObject("pre_aggregated").field("type", "histogram") + .field("ignore_malformed", true) + .endObject().startObject("otherField").field("type", "keyword"); + String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); + DocumentMapper defaultMapper = createIndex("test").mapperService().documentMapperParser() + .parse("_doc", new CompressedXContent(mapping)); + + ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(XContentFactory.jsonBuilder() + .startObject().field("pre_aggregated", new int[] {2, 2, 2}) + .field("otherField","value") + .endObject()), + XContentType.JSON)); + + assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); + assertThat(doc.rootDoc().getField("otherField"), notNullValue()); + } + public void testIgnoreMalformedSkipsField() throws Exception { ensureGreen(); XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java index df6183be50a41..879173a5bc26e 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentileRanksAggregatorTests.java @@ -44,8 +44,8 @@ private BinaryDocValuesField getDocValue(String fieldName, double[] values) thro Iterator iterator = centroids.iterator(); while ( iterator.hasNext()) { Centroid centroid = iterator.next(); - streamOutput.writeDouble(centroid.mean()); streamOutput.writeVInt(centroid.count()); + streamOutput.writeDouble(centroid.mean()); } return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); } diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java index dddf900df90ab..e1340619256cf 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/TDigestPreAggregatedPercentilesAggregatorTests.java @@ -47,8 +47,8 @@ private BinaryDocValuesField getDocValue(String fieldName, double[] values) thro Iterator iterator = centroids.iterator(); while ( iterator.hasNext()) { Centroid centroid = iterator.next(); - streamOutput.writeDouble(centroid.mean()); streamOutput.writeVInt(centroid.count()); + streamOutput.writeDouble(centroid.mean()); } return new BinaryDocValuesField(fieldName, streamOutput.bytes().toBytesRef()); } From c8a1f1246529736a2d66814458ef15361d8dbb90 Mon Sep 17 00:00:00 2001 From: iverase Date: Thu, 28 Nov 2019 11:31:18 +0100 Subject: [PATCH 23/24] remove support for parsed fields --- .../mapper/HistogramFieldMapper.java | 28 ------ .../mapper/HistogramFieldMapperTests.java | 87 ------------------- .../HistogramPercentileAggregationTests.java | 8 +- 3 files changed, 4 insertions(+), 119 deletions(-) diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 8a1f27f202ad8..58a7b860d3189 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -55,7 +55,6 @@ import java.util.Map; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; -import static org.elasticsearch.index.mapper.TypeParsers.parseField; /** * Field Mapper for pre-aggregated histograms. @@ -105,32 +104,6 @@ protected Explicit ignoreMalformed(BuilderContext context) { return HistogramFieldMapper.Defaults.IGNORE_MALFORMED; } - @Override - public Builder store(boolean store) { - if (store) { - throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support " + - "stored fields"); - } - return super.store(false); - } - - @Override - public Builder index(boolean index) { - if (index) { - throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support indexing"); - } - return super.store(false); - } - - @Override - public Builder indexOptions(IndexOptions indexOptions) { - if (indexOptions.equals(IndexOptions.NONE) == false) { - throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support " + - "index options, got [index_options]=" + indexOptionToString(indexOptions)); - } - return super.indexOptions(indexOptions); - } - public HistogramFieldMapper build(BuilderContext context, String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings, MultiFields multiFields, Explicit ignoreMalformed, CopyTo copyTo) { @@ -152,7 +125,6 @@ public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { Builder builder = new HistogramFieldMapper.Builder(name); - parseField(builder, name, node, parserContext); for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { Map.Entry entry = iterator.next(); String propName = entry.getKey(); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java index 3f298c40b7296..8878d86fb2051 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapperTests.java @@ -12,7 +12,6 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.mapper.DocumentMapper; -import org.elasticsearch.index.mapper.DocumentMapperParser; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; @@ -499,92 +498,6 @@ public void testNegativeCount() throws Exception { assertThat(e.getCause().getMessage(), containsString("[counts] elements must be >= 0 but got -3")); } - public void testSetStoredField() throws Exception { - ensureGreen(); - XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") - .startObject("properties").startObject("pre_aggregated").field("type", "histogram") - .field("store", true); - String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); - - DocumentMapperParser documentMapperParser = createIndex("test").mapperService().documentMapperParser(); - - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> - documentMapperParser.parse("_doc", new CompressedXContent(mapping))); - assertThat(e.getMessage(), containsString("The [histogram] field does not support stored fields")); - - xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") - .startObject("properties").startObject("pre_aggregated").field("type", "histogram") - .field("store", false); - String mapping2 = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); - DocumentMapper defaultMapper = documentMapperParser.parse("_doc", new CompressedXContent(mapping2)); - assertNotNull(defaultMapper); - } - - public void testSetIndexField() throws Exception { - ensureGreen(); - XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") - .startObject("properties").startObject("pre_aggregated").field("type", "histogram") - .field("index", true); - final String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); - - DocumentMapperParser documentMapperParser = createIndex("test").mapperService().documentMapperParser(); - - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> - documentMapperParser.parse("_doc", new CompressedXContent(mapping))); - assertThat(e.getMessage(), containsString("The [histogram] field does not support indexing")); - - xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") - .startObject("properties").startObject("pre_aggregated").field("type", "histogram") - .field("index", false); - final String mapping2 = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); - - DocumentMapper defaultMapper = documentMapperParser.parse("_doc", new CompressedXContent(mapping2)); - assertNotNull(defaultMapper); - } - - public void testSetDocValuesField() throws Exception { - ensureGreen(); - XContentBuilder xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") - .startObject("properties").startObject("pre_aggregated").field("type", "histogram") - .field("doc_values", false); - final String mapping = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); - - DocumentMapperParser documentMapperParser = createIndex("test").mapperService().documentMapperParser(); - - DocumentMapper defaultMapper = documentMapperParser.parse("_doc", new CompressedXContent(mapping)); - assertNotNull(defaultMapper); - - ParsedDocument doc = defaultMapper.parse(new SourceToParse("test", "1", - BytesReference.bytes(XContentFactory.jsonBuilder() - .startObject().field("pre_aggregated").startObject() - .field("values", new double[] {2, 3}) - .field("counts", new int[] {4, 6}) - .endObject() - .endObject()), - XContentType.JSON)); - - assertThat(doc.rootDoc().getField("pre_aggregated"), nullValue()); - - xContentBuilder = XContentFactory.jsonBuilder().startObject().startObject("_doc") - .startObject("properties").startObject("pre_aggregated").field("type", "histogram") - .field("doc_values", true); - final String mapping2 = Strings.toString(xContentBuilder.endObject().endObject().endObject().endObject()); - - defaultMapper = documentMapperParser.parse("_doc", new CompressedXContent(mapping2)); - assertNotNull(defaultMapper); - - doc = defaultMapper.parse(new SourceToParse("test", "1", - BytesReference.bytes(XContentFactory.jsonBuilder() - .startObject().field("pre_aggregated").startObject() - .field("values", new double[] {2, 3}) - .field("counts", new int[] {4, 6}) - .endObject() - .endObject()), - XContentType.JSON)); - - assertThat(doc.rootDoc().getField("pre_aggregated"), notNullValue()); - } - @Override protected Collection> getPlugins() { List> plugins = new ArrayList<>(super.getPlugins()); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java index cb5ab379e52c6..9561870f55495 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/mapper/HistogramPercentileAggregationTests.java @@ -74,8 +74,8 @@ public void testHDRHistogram() throws Exception { DoubleHistogram histogram = new DoubleHistogram(numberOfSignificantValueDigits); BulkRequest bulkRequest = new BulkRequest(); - int numDocs = 100000; - int frq = 10000; + int numDocs = 10000; + int frq = 1000; for (int i =0; i < numDocs; i ++) { double value = random().nextDouble(); @@ -168,8 +168,8 @@ public void testTDigestHistogram() throws Exception { TDigestState histogram = new TDigestState(compression); BulkRequest bulkRequest = new BulkRequest(); - int numDocs = 100000; - int frq = 10000; + int numDocs = 10000; + int frq = 1000; for (int i =0; i < numDocs; i ++) { double value = random().nextDouble(); From f8cf1a764ec8b991899ff0d48d3de2ce48fb0042 Mon Sep 17 00:00:00 2001 From: iverase Date: Thu, 28 Nov 2019 12:47:39 +0100 Subject: [PATCH 24/24] addressed last comments --- docs/reference/mapping/types/histogram.asciidoc | 2 +- .../xpack/analytics/mapper/HistogramFieldMapper.java | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index b82e07167cb40..fe4209c52b772 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -30,7 +30,7 @@ per document. Nested arrays are not supported. `histogram` fields are primarily intended for use with aggregations. To make it more readily accessible for aggregations, `histogram` field data is stored as a binary <> and not indexed. Its size in bytes is at most -`12 * numValues`, where `numValues` is the length of the provided arrays. +`13 * numValues`, where `numValues` is the length of the provided arrays. Because the data is not indexed, you only can use `histogram` fields for the following aggregations and queries: diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java index 58a7b860d3189..6ef920bd33fa3 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/mapper/HistogramFieldMapper.java @@ -422,11 +422,10 @@ public void parse(ParseContext context) throws IOException { throw new MapperParsingException("failed to parse field [{}] of type [{}]", ex, fieldType().name(), fieldType().typeName()); } - // we need to advance until the end of the field + if (subParser != null) { - while (token != null) { - token = subParser.nextToken(); - } + // close the subParser so we advance to the end of the object + subParser.close(); } context.addIgnoredField(fieldType().name()); }