Script: keyword fields api support (#81266)

* Script: keyword fields api support Adds basic support for keyword fields in the scripting fields api. Refs: #79105
elastic · Dec 2, 2021 · e7d6706 · e7d6706
1 parent d031cfd
commit e7d6706
Show file tree

Hide file tree

Showing 5 changed files with 197 additions and 6 deletions.
diff --git a/docs/changelog/81266.yaml b/docs/changelog/81266.yaml
@@ -0,0 +1,5 @@
+pr: 81266
+summary: "Script: keyword fields api support"
+area: Infra/Scripting
+type: enhancement
+issues: []
diff --git a/...ainless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.fields.txt b/...ainless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.fields.txt
@@ -67,3 +67,8 @@ class org.elasticsearch.script.field.ShortDocValuesField @dynamic_type {
   short get(int)
   short get(int, int)
 }
+
+class org.elasticsearch.script.field.KeywordDocValuesField @dynamic_type {
+  String get(String)
+  String get(int, String)
+}
diff --git a/...-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml b/...-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml
@@ -76,6 +76,7 @@ setup:
           body:
               rank: 3
               boolean: [true, false, true]
+              keyword: ["one string", "another string"]
               long: [1152921504606846976, 576460752303423488]
               integer: [5, 17, 29]
               short: [6, 18, 30, 45]
@@ -370,6 +371,59 @@ setup:
                             source: "doc['keyword'].value"
     - match: { hits.hits.0.fields.field.0: "not split at all" }
 
+    - do:
+        search:
+          rest_total_hits_as_int: true
+          body:
+            sort: [ { rank: asc } ]
+            script_fields:
+              field:
+                script:
+                  source: "field('keyword').get('missing')"
+    - match: { hits.hits.0.fields.field.0: "not split at all" }
+    - match: { hits.hits.1.fields.field.0: "missing" }
+    - match: { hits.hits.2.fields.field.0: "another string" } # doc values are sorted
+
+    - do:
+        search:
+          rest_total_hits_as_int: true
+          body:
+            sort: [ { rank: asc } ]
+            script_fields:
+              field:
+                script:
+                  source: "/* avoid yaml stash */ $('keyword', 'missing')"
+    # same as `field('keyword').get('missing')`
+    - match: { hits.hits.0.fields.field.0: "not split at all" }
+    - match: { hits.hits.1.fields.field.0: "missing" }
+    - match: { hits.hits.2.fields.field.0: "another string" }
+
+    - do:
+        search:
+          rest_total_hits_as_int: true
+          body:
+            sort: [ { rank: asc } ]
+            script_fields:
+              field:
+                script:
+                  source: "field('keyword').get(1, 'dne')"
+    - match: { hits.hits.0.fields.field.0: "dne" }
+    - match: { hits.hits.1.fields.field.0: "dne" }
+    - match: { hits.hits.2.fields.field.0: "one string" } # doc values are sorted
+
+    - do:
+        search:
+          rest_total_hits_as_int: true
+          body:
+            sort: [ { rank: asc } ]
+            script_fields:
+              field:
+                script:
+                  source: "String.join(', ', field('keyword'))"
+    - match: { hits.hits.0.fields.field.0: "not split at all" }
+    - match: { hits.hits.1.fields.field.0: "" }
+    - match: { hits.hits.2.fields.field.0: "another string, one string" }
+
 ---
 "long":
     - skip:

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -36,14 +36,13 @@
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.index.fielddata.FieldData;
 import org.elasticsearch.index.fielddata.IndexFieldData;
-import org.elasticsearch.index.fielddata.ScriptDocValues;
 import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.similarity.SimilarityProvider;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptCompiler;
 import org.elasticsearch.script.StringFieldScript;
-import org.elasticsearch.script.field.DelegateDocValuesField;
+import org.elasticsearch.script.field.KeywordDocValuesField;
 import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 import org.elasticsearch.search.lookup.FieldValues;
 import org.elasticsearch.search.lookup.SearchLookup;
@@ -406,10 +405,7 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S
             return new SortedSetOrdinalsIndexFieldData.Builder(
                 name(),
                 CoreValuesSourceType.KEYWORD,
-                (dv, n) -> new DelegateDocValuesField(
-                    new ScriptDocValues.Strings(new ScriptDocValues.StringsSupplier(FieldData.toString(dv))),
-                    n
-                )
+                (dv, n) -> new KeywordDocValuesField(FieldData.toString(dv), n)
             );
         }
 

diff --git a/server/src/main/java/org/elasticsearch/script/field/KeywordDocValuesField.java b/server/src/main/java/org/elasticsearch/script/field/KeywordDocValuesField.java
@@ -0,0 +1,131 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.script.field;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.elasticsearch.index.fielddata.ScriptDocValues;
+import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+public class KeywordDocValuesField implements DocValuesField<String>, ScriptDocValues.Supplier<String> {
+
+    private final SortedBinaryDocValues input;
+    private final String name;
+
+    private BytesRefBuilder[] values = new BytesRefBuilder[0];
+    private int count;
+
+    // used for backwards compatibility for old-style "doc" access
+    // as a delegate to this field class
+    private ScriptDocValues.Strings strings = null;
+
+    public KeywordDocValuesField(SortedBinaryDocValues input, String name) {
+        this.input = input;
+        this.name = name;
+    }
+
+    @Override
+    public void setNextDocId(int docId) throws IOException {
+        if (input.advanceExact(docId)) {
+            resize(input.docValueCount());
+            for (int i = 0; i < count; i++) {
+                // We need to make a copy here, because BytesBinaryDVLeafFieldData's SortedBinaryDocValues
+                // implementation reuses the returned BytesRef. Otherwise we would end up with the same BytesRef
+                // instance for all slots in the values array.
+                values[i].copyBytes(input.nextValue());
+            }
+        } else {
+            resize(0);
+        }
+    }
+
+    private void resize(int newSize) {
+        count = newSize;
+        assert count >= 0 : "size must be positive (got " + count + "): likely integer overflow?";
+        if (newSize > values.length) {
+            final int oldLength = values.length;
+            values = ArrayUtil.grow(values, count);
+            for (int i = oldLength; i < values.length; ++i) {
+                values[i] = new BytesRefBuilder();
+            }
+        }
+    }
+
+    @Override
+    public ScriptDocValues<String> getScriptDocValues() {
+        if (strings == null) {
+            strings = new ScriptDocValues.Strings(this);
+        }
+
+        return strings;
+    }
+
+    // this method is required to support the Boolean return values
+    // for the old-style "doc" access in ScriptDocValues
+    @Override
+    public String getInternal(int index) {
+        return bytesToString(values[index].toBytesRef());
+    }
+
+    protected String bytesToString(BytesRef bytesRef) {
+        return bytesRef.utf8ToString();
+    }
+
+    @Override
+    public String getName() {
+        return name;
+    }
+
+    @Override
+    public boolean isEmpty() {
+        return count == 0;
+    }
+
+    @Override
+    public int size() {
+        return count;
+    }
+
+    public String get(String defaultValue) {
+        return get(0, defaultValue);
+    }
+
+    public String get(int index, String defaultValue) {
+        if (isEmpty() || index < 0 || index >= count) {
+            return defaultValue;
+        }
+
+        return bytesToString(values[index].toBytesRef());
+    }
+
+    @Override
+    public Iterator<String> iterator() {
+        return new Iterator<String>() {
+            private int index = 0;
+
+            @Override
+            public boolean hasNext() {
+                return index < count;
+            }
+
+            @Override
+            public String next() {
+                if (hasNext() == false) {
+                    throw new NoSuchElementException();
+                }
+                return bytesToString(values[index++].toBytesRef());
+            }
+        };
+    }
+}