From 8a7ddff5562c09931da11d1f85bca2a8beb30fa9 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Fri, 6 Mar 2020 16:33:30 +0100 Subject: [PATCH 1/3] add a normalizeValue method to normalize values for scripted fields --- .../org/elasticsearch/index/fielddata/FieldData.java | 10 ++++++++++ .../fielddata/SingletonSortedBinaryDocValues.java | 5 +++++ .../index/fielddata/SortedBinaryDocValues.java | 6 ++++++ .../fielddata/plain/BytesBinaryDVAtomicFieldData.java | 5 +++++ .../org/elasticsearch/index/mapper/IdFieldMapper.java | 9 +++++++-- .../bucket/composite/BinaryValuesSource.java | 6 ++++-- .../search/aggregations/support/MissingValues.java | 5 +++++ .../search/aggregations/support/ValuesSource.java | 7 +++++++ .../aggregations/support/values/ScriptBytesValues.java | 8 ++++++++ 9 files changed, 57 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/FieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/FieldData.java index 68b8f2c85325f..40ed67e7f2b6c 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/FieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/FieldData.java @@ -367,6 +367,11 @@ public BytesRef nextValue() throws IOException { return values.lookupOrd(values.nextOrd()); } + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } + }; } @@ -412,6 +417,11 @@ public boolean advanceExact(int docID) throws IOException { return true; } + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } + }; } diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/SingletonSortedBinaryDocValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/SingletonSortedBinaryDocValues.java index 01f110a7b381e..e02b5c70832f1 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/SingletonSortedBinaryDocValues.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/SingletonSortedBinaryDocValues.java @@ -51,4 +51,9 @@ public BinaryDocValues getBinaryDocValues() { return in; } + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } + } diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/SortedBinaryDocValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/SortedBinaryDocValues.java index 119fda1bda8f6..047c23adfb4ef 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/SortedBinaryDocValues.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/SortedBinaryDocValues.java @@ -52,4 +52,10 @@ public abstract class SortedBinaryDocValues { */ public abstract BytesRef nextValue() throws IOException; + /** + * Applies normalization to the value for example a value script if needed. + * @return {@link BytesRef} of the normalized value which can be value if no + * normalization is required. + */ + public abstract BytesRef normalizeValue(BytesRef value); } diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/plain/BytesBinaryDVAtomicFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/plain/BytesBinaryDVAtomicFieldData.java index af4c7fd0da6b2..5cee453629a1c 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/plain/BytesBinaryDVAtomicFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/plain/BytesBinaryDVAtomicFieldData.java @@ -85,6 +85,11 @@ public BytesRef nextValue() throws IOException { return scratch; } + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } + }; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java index 16db2778e5754..bc4b9e36e871e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java @@ -48,10 +48,10 @@ import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.MultiValueMode; -import org.elasticsearch.search.sort.BucketedSort; -import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValuesSourceType; +import org.elasticsearch.search.sort.BucketedSort; +import org.elasticsearch.search.sort.SortOrder; import java.io.IOException; import java.util.Arrays; @@ -275,6 +275,11 @@ public int docValueCount() { public boolean advanceExact(int doc) throws IOException { return inValues.advanceExact(doc); } + + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } }; } }; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/BinaryValuesSource.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/BinaryValuesSource.java index 21346844aac89..b8274541b9646 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/BinaryValuesSource.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/BinaryValuesSource.java @@ -173,11 +173,13 @@ public void collect(int doc, long bucket) throws IOException { } @Override - LeafBucketCollector getLeafCollector(Comparable value, LeafReaderContext context, LeafBucketCollector next) { + LeafBucketCollector getLeafCollector(Comparable value, LeafReaderContext context, LeafBucketCollector next) throws IOException { if (value.getClass() != BytesRef.class) { throw new IllegalArgumentException("Expected BytesRef, got " + value.getClass()); } - currentValue = (BytesRef) value; + final SortedBinaryDocValues dvs = docValuesFunc.apply(context); + currentValue = dvs.normalizeValue((BytesRef) value); + return new LeafBucketCollector() { @Override public void collect(int doc, long bucket) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/MissingValues.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/MissingValues.java index c61091fd2a12c..6b092f7b1fe19 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/MissingValues.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/MissingValues.java @@ -91,6 +91,11 @@ public BytesRef nextValue() throws IOException { public String toString() { return "anon SortedBinaryDocValues of [" + super.toString() + "]"; } + + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } }; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java index 3f77744b5a56e..51d48248a018e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java @@ -282,6 +282,13 @@ public void setScorer(Scorable scorer) { script.setScorer(scorer); } + @Override + public BytesRef normalizeValue(BytesRef value) { + script.setNextAggregationValue(value.utf8ToString()); + Object run = script.execute(); + return new BytesRef(run.toString()); + } + @Override public boolean advanceExact(int doc) throws IOException { if (bytesValues.advanceExact(doc)) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/values/ScriptBytesValues.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/values/ScriptBytesValues.java index e58cf917a7c5d..46cbc69c89b28 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/values/ScriptBytesValues.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/values/ScriptBytesValues.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.support.values; import org.apache.lucene.search.Scorable; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.ScorerAware; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; @@ -88,4 +89,11 @@ public boolean advanceExact(int doc) throws IOException { public void setScorer(Scorable scorer) { script.setScorer(scorer); } + + @Override + public BytesRef normalizeValue(BytesRef value) { + script.setNextAggregationValue(value.utf8ToString()); + Object run = script.execute(); + return new BytesRef(run.toString()); + } } From 575d5378bdcd6ee7e738244dbbaad5abb7e2b0d8 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 10 Mar 2020 10:22:47 +0100 Subject: [PATCH 2/3] fix tests --- .../query/functionscore/FunctionScoreTests.java | 5 +++++ .../bucket/range/BinaryRangeAggregatorTests.java | 15 ++++++++++----- .../aggregations/support/MissingValuesTests.java | 5 +++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/query/functionscore/FunctionScoreTests.java b/server/src/test/java/org/elasticsearch/index/query/functionscore/FunctionScoreTests.java index f72a13d382a80..8b75b08175493 100644 --- a/server/src/test/java/org/elasticsearch/index/query/functionscore/FunctionScoreTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/functionscore/FunctionScoreTests.java @@ -119,6 +119,11 @@ public int docValueCount() { public BytesRef nextValue() { return new BytesRef("0"); } + + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } }; } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/BinaryRangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/BinaryRangeAggregatorTests.java index 6ed2c1a3a8c9e..9e6b880225e53 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/BinaryRangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/BinaryRangeAggregatorTests.java @@ -18,10 +18,7 @@ */ package org.elasticsearch.search.aggregations.bucket.range; -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; +import com.carrotsearch.hppc.LongHashSet; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.TestUtil; @@ -32,7 +29,10 @@ import org.elasticsearch.search.aggregations.bucket.range.BinaryRangeAggregator.SortedSetRangeLeafCollector; import org.elasticsearch.test.ESTestCase; -import com.carrotsearch.hppc.LongHashSet; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; public class BinaryRangeAggregatorTests extends ESTestCase { @@ -169,6 +169,11 @@ public BytesRef nextValue() { return terms[(int) ords[i++]]; } + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } + } private void doTestSortedBinaryRangeLeafCollector(int maxNumValuesPerDoc) throws Exception { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/support/MissingValuesTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/support/MissingValuesTests.java index fb18cd9903235..2f5045014f906 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/support/MissingValuesTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/support/MissingValuesTests.java @@ -73,6 +73,11 @@ public boolean advanceExact(int docId) { public int docValueCount() { return values[doc].length; } + + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } }; final BytesRef missing = new BytesRef(RandomStrings.randomAsciiOfLength(random(), 2)); SortedBinaryDocValues withMissingReplaced = MissingValues.replaceMissing(asBinaryValues, missing); From b777fbd71c3cf05fe1e3e4d3b94c4e7f10f2935d Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Tue, 10 Mar 2020 10:51:44 +0100 Subject: [PATCH 3/3] fix one more test --- .../java/org/elasticsearch/search/MultiValueModeTests.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/search/MultiValueModeTests.java b/server/src/test/java/org/elasticsearch/search/MultiValueModeTests.java index cd75f3695cb3c..f96f2c0c9ae76 100644 --- a/server/src/test/java/org/elasticsearch/search/MultiValueModeTests.java +++ b/server/src/test/java/org/elasticsearch/search/MultiValueModeTests.java @@ -524,6 +524,11 @@ public boolean advanceExact(int doc) { public int docValueCount() { return array[doc].length; } + + @Override + public BytesRef normalizeValue(BytesRef value) { + return value; + } }; verifySortedBinary(multiValues, numDocs); final FixedBitSet rootDocs = randomRootDocs(numDocs);