Skip to content

Commit

Permalink
Script: keyword fields api support (#81266)
Browse files Browse the repository at this point in the history
* Script: keyword fields api support

Adds basic support for keyword fields in the scripting fields api.

Refs: #79105
  • Loading branch information
stu-elastic committed Dec 2, 2021
1 parent d031cfd commit e7d6706
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 6 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/81266.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 81266
summary: "Script: keyword fields api support"
area: Infra/Scripting
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,8 @@ class org.elasticsearch.script.field.ShortDocValuesField @dynamic_type {
short get(int)
short get(int, int)
}

class org.elasticsearch.script.field.KeywordDocValuesField @dynamic_type {
String get(String)
String get(int, String)
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ setup:
body:
rank: 3
boolean: [true, false, true]
keyword: ["one string", "another string"]
long: [1152921504606846976, 576460752303423488]
integer: [5, 17, 29]
short: [6, 18, 30, 45]
Expand Down Expand Up @@ -370,6 +371,59 @@ setup:
source: "doc['keyword'].value"
- match: { hits.hits.0.fields.field.0: "not split at all" }

- do:
search:
rest_total_hits_as_int: true
body:
sort: [ { rank: asc } ]
script_fields:
field:
script:
source: "field('keyword').get('missing')"
- match: { hits.hits.0.fields.field.0: "not split at all" }
- match: { hits.hits.1.fields.field.0: "missing" }
- match: { hits.hits.2.fields.field.0: "another string" } # doc values are sorted

- do:
search:
rest_total_hits_as_int: true
body:
sort: [ { rank: asc } ]
script_fields:
field:
script:
source: "/* avoid yaml stash */ $('keyword', 'missing')"
# same as `field('keyword').get('missing')`
- match: { hits.hits.0.fields.field.0: "not split at all" }
- match: { hits.hits.1.fields.field.0: "missing" }
- match: { hits.hits.2.fields.field.0: "another string" }

- do:
search:
rest_total_hits_as_int: true
body:
sort: [ { rank: asc } ]
script_fields:
field:
script:
source: "field('keyword').get(1, 'dne')"
- match: { hits.hits.0.fields.field.0: "dne" }
- match: { hits.hits.1.fields.field.0: "dne" }
- match: { hits.hits.2.fields.field.0: "one string" } # doc values are sorted

- do:
search:
rest_total_hits_as_int: true
body:
sort: [ { rank: asc } ]
script_fields:
field:
script:
source: "String.join(', ', field('keyword'))"
- match: { hits.hits.0.fields.field.0: "not split at all" }
- match: { hits.hits.1.fields.field.0: "" }
- match: { hits.hits.2.fields.field.0: "another string, one string" }

---
"long":
- skip:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,13 @@
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptCompiler;
import org.elasticsearch.script.StringFieldScript;
import org.elasticsearch.script.field.DelegateDocValuesField;
import org.elasticsearch.script.field.KeywordDocValuesField;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.lookup.FieldValues;
import org.elasticsearch.search.lookup.SearchLookup;
Expand Down Expand Up @@ -406,10 +405,7 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S
return new SortedSetOrdinalsIndexFieldData.Builder(
name(),
CoreValuesSourceType.KEYWORD,
(dv, n) -> new DelegateDocValuesField(
new ScriptDocValues.Strings(new ScriptDocValues.StringsSupplier(FieldData.toString(dv))),
n
)
(dv, n) -> new KeywordDocValuesField(FieldData.toString(dv), n)
);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.script.field;

import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;

import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;

public class KeywordDocValuesField implements DocValuesField<String>, ScriptDocValues.Supplier<String> {

private final SortedBinaryDocValues input;
private final String name;

private BytesRefBuilder[] values = new BytesRefBuilder[0];
private int count;

// used for backwards compatibility for old-style "doc" access
// as a delegate to this field class
private ScriptDocValues.Strings strings = null;

public KeywordDocValuesField(SortedBinaryDocValues input, String name) {
this.input = input;
this.name = name;
}

@Override
public void setNextDocId(int docId) throws IOException {
if (input.advanceExact(docId)) {
resize(input.docValueCount());
for (int i = 0; i < count; i++) {
// We need to make a copy here, because BytesBinaryDVLeafFieldData's SortedBinaryDocValues
// implementation reuses the returned BytesRef. Otherwise we would end up with the same BytesRef
// instance for all slots in the values array.
values[i].copyBytes(input.nextValue());
}
} else {
resize(0);
}
}

private void resize(int newSize) {
count = newSize;
assert count >= 0 : "size must be positive (got " + count + "): likely integer overflow?";
if (newSize > values.length) {
final int oldLength = values.length;
values = ArrayUtil.grow(values, count);
for (int i = oldLength; i < values.length; ++i) {
values[i] = new BytesRefBuilder();
}
}
}

@Override
public ScriptDocValues<String> getScriptDocValues() {
if (strings == null) {
strings = new ScriptDocValues.Strings(this);
}

return strings;
}

// this method is required to support the Boolean return values
// for the old-style "doc" access in ScriptDocValues
@Override
public String getInternal(int index) {
return bytesToString(values[index].toBytesRef());
}

protected String bytesToString(BytesRef bytesRef) {
return bytesRef.utf8ToString();
}

@Override
public String getName() {
return name;
}

@Override
public boolean isEmpty() {
return count == 0;
}

@Override
public int size() {
return count;
}

public String get(String defaultValue) {
return get(0, defaultValue);
}

public String get(int index, String defaultValue) {
if (isEmpty() || index < 0 || index >= count) {
return defaultValue;
}

return bytesToString(values[index].toBytesRef());
}

@Override
public Iterator<String> iterator() {
return new Iterator<String>() {
private int index = 0;

@Override
public boolean hasNext() {
return index < count;
}

@Override
public String next() {
if (hasNext() == false) {
throw new NoSuchElementException();
}
return bytesToString(values[index++].toBytesRef());
}
};
}
}

0 comments on commit e7d6706

Please sign in to comment.