From b10bbd01059b0c09da3b62f8c3c3b6ad7949ccef Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 12 Feb 2020 17:57:04 -0500 Subject: [PATCH] Fix a DST error in date_histogram (backport of #52016) When `date_histogram` attempts to optimize itself it for a particular time zone it checks to see if the entire shard is within the same "transition". Most time zone transition once every size months or thereabouts so the optimization can usually kicks in. *But* it crashes when you attempt feed it a time zone who's last DST transition was before epoch. The reason for this is a little twisted: before this patch it'd find the next and previous transitions in milliseconds since epoch. Then it'd cast them to `Long`s and pass them into the `DateFieldType` to check if the shard's contents were within the range. The trouble is they are then converted to `String`s which are *then* parsed back to `Instant`s which are then convertd to `long`s. And the parser doesn't like most negative numbers. And everything before epoch is negative. This change removes the `long` -> `Long` -> `String` -> `Instant` -> `long` chain in favor of passing the `long` -> `Instant` -> `long` which avoids the fairly complex parsing code and handles a bunch of interesting edge cases around epoch. And other edge cases around `date_nanos`. Closes #50265 --- .../test/search.aggregation/10_histogram.yml | 304 +++++++++++++++++- .../elasticsearch/common/time/DateUtils.java | 22 +- .../index/mapper/DateFieldMapper.java | 45 ++- .../index/mapper/MappedFieldType.java | 8 +- .../DateHistogramAggregationBuilder.java | 177 +++++----- .../common/time/DateUtilsTests.java | 24 +- .../index/mapper/DateFieldTypeTests.java | 120 +++++-- 7 files changed, 581 insertions(+), 119 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index 3c4dba98ab943..53ff35d348fb1 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -6,11 +6,11 @@ setup: settings: number_of_replicas: 0 mappings: - "properties": - "number": - "type" : "integer" - "date": - "type" : "date" + properties: + number: + type: integer + date: + type: date - do: cluster.health: wait_for_status: green @@ -196,3 +196,297 @@ setup: - match: { aggregations.histo.buckets.2.key_as_string: "2016-01-01T00:00:00.000Z" } - match: { aggregations.histo.buckets.2.doc_count: 2 } + +--- +"date_histogram": + - skip: + version: " - 7.1.99" + reason: calendar_interval introduced in 7.2.0 + + - do: + indices.create: + index: test_2 + body: + settings: + # There was a BWC issue that only showed up on empty shards. This + # test has 4 docs and 5 shards makes sure we get one empty. + number_of_shards: 5 + mappings: + properties: + date: + type: date + fields: + nanos: + type: date_nanos + + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"date": "2016-01-01"}' + - '{"index": {}}' + - '{"date": "2016-01-02"}' + - '{"index": {}}' + - '{"date": "2016-02-01"}' + - '{"index": {}}' + - '{"date": "2016-03-01"}' + + - do: + search: + body: + size: 0 + aggs: + histo: + date_histogram: + field: date + calendar_interval: month + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 3 } + - match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 2 } + - match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.1.doc_count: 1 } + - match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.2.doc_count: 1 } + + - do: + search: + body: + size: 0 + aggs: + histo: + date_histogram: + field: date.nanos + calendar_interval: month + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 3 } + - match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 2 } + - match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.1.doc_count: 1 } + - match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.2.doc_count: 1 } + +--- +"date_histogram with offset": + - skip: + version: "all" + reason: "AwaitsFix https://github.com/elastic/elasticsearch/issues/51525" + # When fixed, reinstate these lines + #version: " - 7.1.99" + #reason: calendar_interval introduced in 7.2.0 + + - do: + indices.create: + index: test_2 + body: + settings: + # There was a BWC issue that only showed up on empty shards. This + # test has 4 docs and 5 shards makes sure we get one empty. + number_of_shards: 5 + mappings: + properties: + date: + type : date + + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"date": "2016-01-01"}' + - '{"index": {}}' + - '{"date": "2016-01-02"}' + - '{"index": {}}' + - '{"date": "2016-02-01"}' + - '{"index": {}}' + - '{"date": "2016-03-01"}' + + - do: + search: + body: + size: 0 + aggs: + histo: + date_histogram: + field: date + calendar_interval: month + offset: +1d + + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 3 } + - match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 1 } + - match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" } + - match: { aggregations.histo.buckets.1.doc_count: 2 } + - match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" } + - match: { aggregations.histo.buckets.2.doc_count: 1 } + + +--- +"date_histogram on range": + - skip: + version: " - 7.4.1" + reason: doc values on ranges implemented in 7.4.1 + + - do: + indices.create: + index: test_2 + body: + settings: + # There was a BWC issue that only showed up on empty shards. This + # test has 4 docs and 5 shards makes sure we get one empty. + number_of_shards: 5 + mappings: + properties: + range: + type : date_range + + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}' + - '{"index": {}}' + - '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}' + - '{"index": {}}' + - '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}' + - '{"index": {}}' + - '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}' + + - do: + search: + body: + size: 0 + aggs: + histo: + date_histogram: + field: range + calendar_interval: month + + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 3 } + - match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 2 } + - match: { aggregations.histo.buckets.1.key_as_string: "2016-02-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.1.doc_count: 1 } + - match: { aggregations.histo.buckets.2.key_as_string: "2016-03-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.2.doc_count: 1 } + +--- +"date_histogram on range with offset": + - skip: + version: " - 7.4.1" + reason: doc values on ranges implemented in 7.4.1 + + - do: + indices.create: + index: test_2 + body: + settings: + # There was a BWC issue that only showed up on empty shards. This + # test has 4 docs and 5 shards makes sure we get one empty. + number_of_shards: 5 + mappings: + properties: + range: + type : date_range + + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"range": {"gte": "2016-01-01", "lt": "2016-01-02"}}' + - '{"index": {}}' + - '{"range": {"gte": "2016-01-02", "lt": "2016-01-03"}}' + - '{"index": {}}' + - '{"range": {"gte": "2016-02-01", "lt": "2016-02-02"}}' + - '{"index": {}}' + - '{"range": {"gte": "2016-03-01", "lt": "2016-03-02"}}' + + - do: + search: + body: + size: 0 + aggs: + histo: + date_histogram: + field: range + calendar_interval: month + offset: +1d + + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 3 } + - match: { aggregations.histo.buckets.0.key_as_string: "2015-12-02T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 1 } + - match: { aggregations.histo.buckets.1.key_as_string: "2016-01-02T00:00:00.000Z" } + - match: { aggregations.histo.buckets.1.doc_count: 2 } + - match: { aggregations.histo.buckets.2.key_as_string: "2016-02-02T00:00:00.000Z" } + - match: { aggregations.histo.buckets.2.doc_count: 1 } + +--- +"date_histogram with pre-epoch daylight savings time transition": + - skip: + version: " - 7.6.1" + reason: bug fixed in 7.6.1 + # Add date_nanos to the mapping. We couldn't do it during setup because that + # is run against 6.8 which doesn't have date_nanos + - do: + indices.put_mapping: + index: test_1 + body: + properties: + number: + type: integer + date: + type: date + fields: + nanos: + type: date_nanos + + - do: + bulk: + index: test_1 + refresh: true + body: + - '{"index": {}}' + - '{"date": "2016-01-01"}' + + - do: + search: + body: + size: 0 + aggs: + histo: + date_histogram: + field: date + fixed_interval: 1ms + time_zone: America/Phoenix + + - match: { hits.total.value: 1 } + - length: { aggregations.histo.buckets: 1 } + - match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" } + - match: { aggregations.histo.buckets.0.doc_count: 1 } + + - do: + search: + body: + size: 0 + aggs: + histo: + date_histogram: + field: date.nanos + fixed_interval: 1ms + time_zone: America/Phoenix + + - match: { hits.total.value: 1 } + - length: { aggregations.histo.buckets: 1 } + - match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" } + - match: { aggregations.histo.buckets.0.doc_count: 1 } diff --git a/server/src/main/java/org/elasticsearch/common/time/DateUtils.java b/server/src/main/java/org/elasticsearch/common/time/DateUtils.java index e6bf6a65105b7..7b3a06bc665b9 100644 --- a/server/src/main/java/org/elasticsearch/common/time/DateUtils.java +++ b/server/src/main/java/org/elasticsearch/common/time/DateUtils.java @@ -208,7 +208,7 @@ public static ZoneId of(String zoneId) { return ZoneId.of(zoneId).normalized(); } - private static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z"); + static final Instant MAX_NANOSECOND_INSTANT = Instant.parse("2262-04-11T23:47:16.854775807Z"); static final long MAX_NANOSECOND_IN_MILLIS = MAX_NANOSECOND_INSTANT.toEpochMilli(); @@ -231,6 +231,26 @@ public static long toLong(Instant instant) { return instant.getEpochSecond() * 1_000_000_000 + instant.getNano(); } + /** + * Returns an instant that is with valid nanosecond resolution. If + * the parameter is before the valid nanosecond range then this returns + * the minimum {@linkplain Instant} valid for nanosecond resultion. If + * the parameter is after the valid nanosecond range then this returns + * the maximum {@linkplain Instant} valid for nanosecond resolution. + *

+ * Useful for checking if all values for the field are within some range, + * even if the range's endpoints are not valid nanosecond resolution. + */ + public static Instant clampToNanosRange(Instant instant) { + if (instant.isBefore(Instant.EPOCH)) { + return Instant.EPOCH; + } + if (instant.isAfter(MAX_NANOSECOND_INSTANT)) { + return MAX_NANOSECOND_INSTANT; + } + return instant; + } + /** * convert a long value to a java time instant * the long value resembles the nanoseconds since the epoch diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 43762645190e0..9547233efbbfb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -87,6 +87,11 @@ public long convert(Instant instant) { public Instant toInstant(long value) { return Instant.ofEpochMilli(value); } + + @Override + public Instant clampToValidRange(Instant instant) { + return instant; + } }, NANOSECONDS("date_nanos", NumericType.DATE_NANOSECONDS) { @Override @@ -98,6 +103,11 @@ public long convert(Instant instant) { public Instant toInstant(long value) { return DateUtils.toInstant(value); } + + @Override + public Instant clampToValidRange(Instant instant) { + return DateUtils.clampToNanosRange(instant); + } }; private final String type; @@ -116,10 +126,18 @@ NumericType numericType() { return numericType; } + /** + * Convert an {@linkplain Instant} into a long value in this resolution. + */ public abstract long convert(Instant instant); + /** + * Convert a long value in this resolution into an instant. + */ public abstract Instant toInstant(long value); + public abstract Instant clampToValidRange(Instant instant); + public static Resolution ofOrdinal(int ord) { for (Resolution resolution : values()) { if (ord == resolution.ordinal()) { @@ -439,9 +457,30 @@ public Relation isFieldWithinQuery(IndexReader reader, } } - // This check needs to be done after fromInclusive and toInclusive - // are resolved so we can throw an exception if they are invalid - // even if there are no points in the shard + return isFieldWithinRange(reader, fromInclusive, toInclusive); + } + + /** + * Return whether all values of the given {@link IndexReader} are within the range, + * outside the range or cross the range. Unlike {@link #isFieldWithinQuery} this + * accepts values that are out of the range of the {@link #resolution} of this field. + * @param fromInclusive start date, inclusive + * @param toInclusive end date, inclusive + */ + public Relation isFieldWithinRange(IndexReader reader, Instant fromInclusive, Instant toInclusive) + throws IOException { + return isFieldWithinRange(reader, + resolution.convert(resolution.clampToValidRange(fromInclusive)), + resolution.convert(resolution.clampToValidRange(toInclusive))); + } + + /** + * Return whether all values of the given {@link IndexReader} are within the range, + * outside the range or cross the range. + * @param fromInclusive start date, inclusive, {@link Resolution#convert(Instant) converted} to the appropriate scale + * @param toInclusive end date, inclusive, {@link Resolution#convert(Instant) converted} to the appropriate scale + */ + private Relation isFieldWithinRange(IndexReader reader, long fromInclusive, long toInclusive) throws IOException { if (PointValues.size(reader, name()) == 0) { // no points, so nothing matches return Relation.DISJOINT; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 82ea883008d1d..746a6b803fad7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -420,10 +420,10 @@ public enum Relation { * {@link Relation#INTERSECTS}, which is always fine to return when there is * no way to check whether values are actually within bounds. */ public Relation isFieldWithinQuery( - IndexReader reader, - Object from, Object to, - boolean includeLower, boolean includeUpper, - ZoneId timeZone, DateMathParser dateMathParser, QueryRewriteContext context) throws IOException { + IndexReader reader, + Object from, Object to, + boolean includeLower, boolean includeUpper, + ZoneId timeZone, DateMathParser dateMathParser, QueryRewriteContext context) throws IOException { return Relation.INTERSECTS; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java index 3e9c219d0235d..483dc126554d3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationBuilder.java @@ -34,6 +34,7 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.fielddata.AtomicNumericFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType.Relation; import org.elasticsearch.index.query.QueryShardContext; @@ -410,86 +411,118 @@ public String getType() { return NAME; } - /* + /** + * Returns a {@linkplain ZoneId} that functions the same as + * {@link #timeZone()} on the data in the shard referred to by + * {@code context}. It attempts to convert zones that + * have non-fixed offsets into fixed offset zones that produce the + * same results on all data in the shard. + *

+ * We go about this in three phases: + *

    + *
  1. A bunch of preflight checks to see if we *can* optimize it + *
  2. Find the any Instant in shard + *
  3. Find the DST transition before and after that Instant + *
  4. Round those into the interval + *
  5. Check if the rounded value include all values within shard + *
  6. If they do then return a fixed offset time zone because it + * will return the same values for all time in the shard as the + * original time zone, but faster + *
  7. Otherwise return the original time zone. It'll be slower, but + * correct. + *
+ *

* NOTE: this can't be done in rewrite() because the timezone is then also used on the * coordinating node in order to generate missing buckets, which may cross a transition * even though data on the shards doesn't. */ ZoneId rewriteTimeZone(QueryShardContext context) throws IOException { final ZoneId tz = timeZone(); - if (field() != null && - tz != null && - tz.getRules().isFixedOffset() == false && - field() != null && - script() == null) { - final MappedFieldType ft = context.fieldMapper(field()); - final IndexReader reader = context.getIndexReader(); - if (ft != null && reader != null) { - Long anyInstant = null; - final IndexNumericFieldData fieldData = context.getForField(ft); - for (LeafReaderContext ctx : reader.leaves()) { - AtomicNumericFieldData leafFD = fieldData.load(ctx); - SortedNumericDocValues values = leafFD.getLongValues(); - if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - anyInstant = values.nextValue(); - break; - } - } - - if (anyInstant != null) { - Instant instant = Instant.ofEpochMilli(anyInstant); - ZoneOffsetTransition prevOffsetTransition = tz.getRules().previousTransition(instant); - final long prevTransition; - if (prevOffsetTransition != null) { - prevTransition = prevOffsetTransition.getInstant().toEpochMilli(); - } else { - prevTransition = instant.toEpochMilli(); - } - ZoneOffsetTransition nextOffsetTransition = tz.getRules().nextTransition(instant); - final long nextTransition; - if (nextOffsetTransition != null) { - nextTransition = nextOffsetTransition.getInstant().toEpochMilli(); - } else { - nextTransition = instant.toEpochMilli(); - } - - // We need all not only values but also rounded values to be within - // [prevTransition, nextTransition]. - final long low; - - - DateIntervalWrapper.IntervalTypeEnum intervalType = dateHistogramInterval.getIntervalType(); - if (intervalType.equals(DateIntervalWrapper.IntervalTypeEnum.FIXED)) { - low = Math.addExact(prevTransition, dateHistogramInterval.tryIntervalAsFixedUnit().millis()); - } else if (intervalType.equals(DateIntervalWrapper.IntervalTypeEnum.CALENDAR)) { - final Rounding.DateTimeUnit intervalAsUnit = dateHistogramInterval.tryIntervalAsCalendarUnit(); - final Rounding rounding = Rounding.builder(intervalAsUnit).timeZone(timeZone()).build(); - low = rounding.nextRoundingValue(prevTransition); - } else { - // We're not sure what the interval was originally (legacy) so use old behavior of assuming - // calendar first, then fixed. Required because fixed/cal overlap in places ("1h") - Rounding.DateTimeUnit intervalAsUnit = dateHistogramInterval.tryIntervalAsCalendarUnit(); - if (intervalAsUnit != null) { - final Rounding rounding = Rounding.builder(intervalAsUnit).timeZone(timeZone()).build(); - low = rounding.nextRoundingValue(prevTransition); - } else { - final TimeValue intervalAsMillis = dateHistogramInterval.tryIntervalAsFixedUnit(); - low = Math.addExact(prevTransition, intervalAsMillis.millis()); - } - } - // rounding rounds down, so 'nextTransition' is a good upper bound - final long high = nextTransition; - - if (ft.isFieldWithinQuery(reader, low, high, true, false, ZoneOffset.UTC, EPOCH_MILLIS_PARSER, - context) == Relation.WITHIN) { - // All values in this reader have the same offset despite daylight saving times. - // This is very common for location-based timezones such as Europe/Paris in - // combination with time-based indices. - return ZoneOffset.ofTotalSeconds(tz.getRules().getOffset(instant).getTotalSeconds()); - } - } + if (tz == null || tz.getRules().isFixedOffset()) { + // This time zone is already as fast as it is going to get. + return tz; + } + if (script() != null) { + // We can't be sure what dates the script will return so we don't attempt to optimize anything + return tz; + } + if (field() == null) { + // Without a field we're not going to be able to look anything up. + return tz; + } + MappedFieldType ft = context.fieldMapper(field()); + if (ft == null || false == ft instanceof DateFieldMapper.DateFieldType) { + // If the field is unmapped or not a date then we can't get its range. + return tz; + } + DateFieldMapper.DateFieldType dft = (DateFieldMapper.DateFieldType) ft; + final IndexReader reader = context.getIndexReader(); + if (reader == null) { + return tz; + } + + Instant instant = null; + final IndexNumericFieldData fieldData = context.getForField(ft); + for (LeafReaderContext ctx : reader.leaves()) { + AtomicNumericFieldData leafFD = fieldData.load(ctx); + SortedNumericDocValues values = leafFD.getLongValues(); + if (values.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + instant = Instant.ofEpochMilli(values.nextValue()); + break; } } + if (instant == null) { + return tz; + } + + ZoneOffsetTransition prevOffsetTransition = tz.getRules().previousTransition(instant); + final long prevTransition; + if (prevOffsetTransition != null) { + prevTransition = prevOffsetTransition.getInstant().toEpochMilli(); + } else { + prevTransition = instant.toEpochMilli(); + } + ZoneOffsetTransition nextOffsetTransition = tz.getRules().nextTransition(instant); + final long nextTransition; + if (nextOffsetTransition != null) { + nextTransition = nextOffsetTransition.getInstant().toEpochMilli(); + } else { + nextTransition = instant.toEpochMilli(); + } + + // We need all not only values but also rounded values to be within + // [prevTransition, nextTransition]. + final long low; + + DateIntervalWrapper.IntervalTypeEnum intervalType = dateHistogramInterval.getIntervalType(); + if (intervalType.equals(DateIntervalWrapper.IntervalTypeEnum.FIXED)) { + low = Math.addExact(prevTransition, dateHistogramInterval.tryIntervalAsFixedUnit().millis()); + } else if (intervalType.equals(DateIntervalWrapper.IntervalTypeEnum.CALENDAR)) { + final Rounding.DateTimeUnit intervalAsUnit = dateHistogramInterval.tryIntervalAsCalendarUnit(); + final Rounding rounding = Rounding.builder(intervalAsUnit).timeZone(timeZone()).build(); + low = rounding.nextRoundingValue(prevTransition); + } else { + // We're not sure what the interval was originally (legacy) so use old behavior of assuming + // calendar first, then fixed. Required because fixed/cal overlap in places ("1h") + Rounding.DateTimeUnit intervalAsUnit = dateHistogramInterval.tryIntervalAsCalendarUnit(); + if (intervalAsUnit != null) { + final Rounding rounding = Rounding.builder(intervalAsUnit).timeZone(timeZone()).build(); + low = rounding.nextRoundingValue(prevTransition); + } else { + final TimeValue intervalAsMillis = dateHistogramInterval.tryIntervalAsFixedUnit(); + low = Math.addExact(prevTransition, intervalAsMillis.millis()); + } + } + // rounding rounds down, so 'nextTransition' is a good upper bound + final long high = nextTransition; + + if (dft.isFieldWithinRange( + reader, Instant.ofEpochMilli(low), Instant.ofEpochMilli(high - 1)) == Relation.WITHIN) { + // All values in this reader have the same offset despite daylight saving times. + // This is very common for location-based timezones such as Europe/Paris in + // combination with time-based indices. + return ZoneOffset.ofTotalSeconds(tz.getRules().getOffset(instant).getTotalSeconds()); + } return tz; } diff --git a/server/src/test/java/org/elasticsearch/common/time/DateUtilsTests.java b/server/src/test/java/org/elasticsearch/common/time/DateUtilsTests.java index 4ef095da049ec..28d52e2bd8076 100644 --- a/server/src/test/java/org/elasticsearch/common/time/DateUtilsTests.java +++ b/server/src/test/java/org/elasticsearch/common/time/DateUtilsTests.java @@ -35,6 +35,7 @@ import java.util.HashSet; import java.util.Set; +import static org.elasticsearch.common.time.DateUtils.clampToNanosRange; import static org.elasticsearch.common.time.DateUtils.toInstant; import static org.elasticsearch.common.time.DateUtils.toLong; import static org.elasticsearch.common.time.DateUtils.toMilliSeconds; @@ -84,8 +85,8 @@ public void testInstantToLongMin() { } public void testInstantToLongMax() { - Instant tooEarlyInstant = ZonedDateTime.parse("2262-04-11T23:47:16.854775808Z").toInstant(); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> toLong(tooEarlyInstant)); + Instant tooLateInstant = ZonedDateTime.parse("2262-04-11T23:47:16.854775808Z").toInstant(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> toLong(tooLateInstant)); assertThat(e.getMessage(), containsString("is after")); } @@ -109,6 +110,25 @@ public void testLongToInstant() { is(ZonedDateTime.parse("2262-04-11T23:47:16.854775807Z").toInstant())); } + public void testClampToNanosRange() { + assertThat(clampToNanosRange(Instant.EPOCH), equalTo(Instant.EPOCH)); + + Instant instant = createRandomInstant(); + assertThat(clampToNanosRange(instant), equalTo(instant)); + } + + public void testClampToNanosRangeMin() { + assertThat(clampToNanosRange(Instant.EPOCH.minusMillis(1)), equalTo(Instant.EPOCH)); + + Instant tooEarlyInstant = ZonedDateTime.parse("1677-09-21T00:12:43.145224191Z").toInstant(); + assertThat(clampToNanosRange(tooEarlyInstant), equalTo(Instant.EPOCH)); + } + + public void testClampToNanosRangeMax() { + Instant tooLateInstant = ZonedDateTime.parse("2262-04-11T23:47:16.854775808Z").toInstant(); + assertThat(clampToNanosRange(tooLateInstant), equalTo(DateUtils.MAX_NANOSECOND_INSTANT)); + } + public void testNanosToMillis() { assertThat(toMilliSeconds(0), is(Instant.EPOCH.toEpochMilli())); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index 6ac59169ad908..d18d0fb097746 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -45,6 +45,7 @@ import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.index.fielddata.plain.SortedNumericDVIndexFieldData; import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.index.mapper.MappedFieldType.Relation; import org.elasticsearch.index.mapper.ParseContext.Document; import org.elasticsearch.index.query.QueryRewriteContext; @@ -53,6 +54,7 @@ import org.junit.Before; import java.io.IOException; +import java.time.Instant; import java.time.ZoneOffset; import java.util.Locale; @@ -82,13 +84,94 @@ public void modify(MappedFieldType ft) { nowInMillis = randomNonNegativeLong(); } - public void testIsFieldWithinQueryEmptyReader() throws IOException { + public void testIsFieldWithinRangeEmptyReader() throws IOException { QueryRewriteContext context = new QueryRewriteContext(xContentRegistry(), writableRegistry(), null, () -> nowInMillis); IndexReader reader = new MultiReader(); DateFieldType ft = new DateFieldType(); ft.setName("my_date"); assertEquals(Relation.DISJOINT, ft.isFieldWithinQuery(reader, "2015-10-12", "2016-04-03", randomBoolean(), randomBoolean(), null, null, context)); + assertEquals(Relation.DISJOINT, ft.isFieldWithinRange(reader, instant("2015-10-12"), instant("2016-04-03"))); + } + + public void testIsFieldWithinQueryDateMillis() throws IOException { + DateFieldType ft = new DateFieldType(); + ft.setResolution(Resolution.MILLISECONDS); + isFieldWithinRangeTestCase(ft); + } + + public void testIsFieldWithinQueryDateNanos() throws IOException { + DateFieldType ft = new DateFieldType(); + ft.setResolution(Resolution.NANOSECONDS); + isFieldWithinRangeTestCase(ft); + } + + public void isFieldWithinRangeTestCase(DateFieldType ft) throws IOException { + ft.setName("my_date"); + + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); + Document doc = new Document(); + LongPoint field = new LongPoint("my_date", ft.parse("2015-10-12")); + doc.add(field); + w.addDocument(doc); + field.setLongValue(ft.parse("2016-04-03")); + w.addDocument(doc); + DirectoryReader reader = DirectoryReader.open(w); + + DateMathParser alternateFormat = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.toDateMathParser(); + doTestIsFieldWithinQuery(ft, reader, null, null); + doTestIsFieldWithinQuery(ft, reader, null, alternateFormat); + doTestIsFieldWithinQuery(ft, reader, DateTimeZone.UTC, null); + doTestIsFieldWithinQuery(ft, reader, DateTimeZone.UTC, alternateFormat); + + QueryRewriteContext context = new QueryRewriteContext(xContentRegistry(), writableRegistry(), null, () -> nowInMillis); + assertEquals(Relation.INTERSECTS, ft.isFieldWithinRange(reader, instant("2015-10-09"), instant("2016-01-02"))); + assertEquals(Relation.INTERSECTS, ft.isFieldWithinRange(reader, instant("2016-01-02"), instant("2016-06-20"))); + assertEquals(Relation.INTERSECTS, ft.isFieldWithinRange(reader, instant("2016-01-02"), instant("2016-02-12"))); + assertEquals(Relation.DISJOINT, ft.isFieldWithinRange(reader, instant("2014-01-02"), instant("2015-02-12"))); + assertEquals(Relation.DISJOINT, ft.isFieldWithinRange(reader, instant("2016-05-11"), instant("2016-08-30"))); + assertEquals(Relation.WITHIN, ft.isFieldWithinRange(reader, instant("2015-09-25"), instant("2016-05-29"))); + assertEquals(Relation.WITHIN, ft.isFieldWithinRange(reader, instant("2015-10-12"), instant("2016-04-03"))); + assertEquals(Relation.INTERSECTS, + ft.isFieldWithinRange(reader, instant("2015-10-12").plusMillis(1), instant("2016-04-03").minusMillis(1))); + assertEquals(Relation.INTERSECTS, + ft.isFieldWithinRange(reader, instant("2015-10-12").plusMillis(1), instant("2016-04-03"))); + assertEquals(Relation.INTERSECTS, + ft.isFieldWithinRange(reader, instant("2015-10-12"), instant("2016-04-03").minusMillis(1))); + assertEquals(Relation.INTERSECTS, + ft.isFieldWithinRange(reader, instant("2015-10-12").plusNanos(1), instant("2016-04-03").minusNanos(1))); + assertEquals(ft.resolution() == Resolution.NANOSECONDS ? Relation.INTERSECTS : Relation.WITHIN, // Millis round down here. + ft.isFieldWithinRange(reader, instant("2015-10-12").plusNanos(1), instant("2016-04-03"))); + assertEquals(Relation.INTERSECTS, + ft.isFieldWithinRange(reader, instant("2015-10-12"), instant("2016-04-03").minusNanos(1))); + + // Some edge cases + assertEquals(Relation.WITHIN, ft.isFieldWithinRange(reader, Instant.EPOCH, instant("2016-04-03"))); + assertEquals(Relation.WITHIN, ft.isFieldWithinRange(reader, Instant.ofEpochMilli(-1000), instant("2016-04-03"))); + assertEquals(Relation.WITHIN, ft.isFieldWithinRange(reader, Instant.ofEpochMilli(Long.MIN_VALUE), instant("2016-04-03"))); + assertEquals(Relation.WITHIN, ft.isFieldWithinRange(reader, instant("2015-10-12"), Instant.ofEpochMilli(Long.MAX_VALUE))); + + // Fields with no value indexed. + DateFieldType ft2 = new DateFieldType(); + ft2.setName("my_date2"); + + assertEquals(Relation.DISJOINT, ft2.isFieldWithinQuery(reader, "2015-10-09", "2016-01-02", false, false, null, null, context)); + assertEquals(Relation.DISJOINT, ft2.isFieldWithinRange(reader, instant("2015-10-09"), instant("2016-01-02"))); + + // Fire a bunch of random values into isFieldWithinRange to make sure it doesn't crash + for (int iter = 0; iter < 1000; iter++) { + long min = randomLong(); + long max = randomLong(); + if (min > max) { + long swap = max; + max = min; + min = swap; + } + ft.isFieldWithinRange(reader, Instant.ofEpochMilli(min), Instant.ofEpochMilli(max)); + } + + IOUtils.close(reader, w, dir); } private void doTestIsFieldWithinQuery(DateFieldType ft, DirectoryReader reader, @@ -116,37 +199,6 @@ private void doTestIsFieldWithinQuery(DateFieldType ft, DirectoryReader reader, true, false, null, null, context)); } - public void testIsFieldWithinQuery() throws IOException { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null)); - long instant1 = - DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse("2015-10-12")).toInstant().toEpochMilli(); - long instant2 = - DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse("2016-04-03")).toInstant().toEpochMilli(); - Document doc = new Document(); - LongPoint field = new LongPoint("my_date", instant1); - doc.add(field); - w.addDocument(doc); - field.setLongValue(instant2); - w.addDocument(doc); - DirectoryReader reader = DirectoryReader.open(w); - DateFieldType ft = new DateFieldType(); - ft.setName("my_date"); - DateMathParser alternateFormat = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.toDateMathParser(); - doTestIsFieldWithinQuery(ft, reader, null, null); - doTestIsFieldWithinQuery(ft, reader, null, alternateFormat); - doTestIsFieldWithinQuery(ft, reader, DateTimeZone.UTC, null); - doTestIsFieldWithinQuery(ft, reader, DateTimeZone.UTC, alternateFormat); - - // Fields with no value indexed. - DateFieldType ft2 = new DateFieldType(); - ft2.setName("my_date2"); - - QueryRewriteContext context = new QueryRewriteContext(xContentRegistry(), writableRegistry(), null, () -> nowInMillis); - assertEquals(Relation.DISJOINT, ft2.isFieldWithinQuery(reader, "2015-10-09", "2016-01-02", false, false, null, null, context)); - IOUtils.close(reader, w, dir); - } - public void testValueFormat() { MappedFieldType ft = createDefaultFieldType(); long instant = DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse("2015-10-12T14:10:55")) @@ -251,4 +303,8 @@ public void testDateNanoDocValues() throws IOException { w.close(); dir.close(); } + + private Instant instant(String str) { + return DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(str)).toInstant(); + } }