From 601faf6a980171be13cda60d68821f2bedf36a1e Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Fri, 1 Nov 2019 15:48:17 +0200 Subject: [PATCH] [7.5][ML] Prevent fetching multi-field from source (#48770) (#48798) Aggregatable mutli-fields are at the moment wrongly mapped as normal doc_value fields and thus they support fetching from source. However, they do not exist in the source. This results to failure to extract such fields. This commit fixes this bug. While a fix could be worked out on top of the existing code, it is evident the extraction logic has become difficult to understand and maintain. As we also want to deduplicate multi-fields for data frame analytics, it seemed appropriate to refactor the code to simplify and better handle the extraction of multi-fields. Relates #48756 Backport of #48770 --- .../ml/integration/DatafeedJobsRestIT.java | 21 +- .../extractor/scroll/ScrollDataExtractor.java | 2 +- .../scroll/SearchHitToJsonProcessor.java | 2 +- .../scroll/TimeBasedExtractedFields.java | 19 +- .../extractor/DataFrameDataExtractor.java | 6 +- .../extractor/ExtractedFieldsDetector.java | 39 +- .../xpack/ml/extractor/AbstractField.java | 49 +++ .../xpack/ml/extractor/DocValueField.java | 48 +++ .../xpack/ml/extractor/ExtractedField.java | 352 +++--------------- .../xpack/ml/extractor/ExtractedFields.java | 120 ++++-- .../xpack/ml/extractor/GeoPointField.java | 55 +++ .../xpack/ml/extractor/GeoShapeField.java | 84 +++++ .../xpack/ml/extractor/MultiField.java | 80 ++++ .../xpack/ml/extractor/ScriptField.java | 42 +++ .../xpack/ml/extractor/SourceField.java | 74 ++++ .../xpack/ml/extractor/TimeField.java | 73 ++++ .../scroll/ScrollDataExtractorTests.java | 8 +- .../scroll/SearchHitToJsonProcessorTests.java | 26 +- .../scroll/TimeBasedExtractedFieldsTests.java | 39 +- .../DataFrameDataExtractorTests.java | 29 +- .../ExtractedFieldsDetectorTests.java | 28 +- .../ml/extractor/DocValueFieldTests.java | 79 ++++ .../ml/extractor/ExtractedFieldTests.java | 194 ---------- .../ml/extractor/ExtractedFieldsTests.java | 68 ++-- .../ml/extractor/GeoPointFieldTests.java | 39 ++ .../ml/extractor/GeoShapeFieldTests.java | 62 +++ .../xpack/ml/extractor/MultiFieldTests.java | 50 +++ .../xpack/ml/extractor/ScriptFieldTests.java | 64 ++++ .../xpack/ml/extractor/SourceFieldTests.java | 70 ++++ .../xpack/ml/extractor/TimeFieldTests.java | 67 ++++ 30 files changed, 1214 insertions(+), 675 deletions(-) create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/AbstractField.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/DocValueField.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoPointField.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoShapeField.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/MultiField.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ScriptField.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/SourceField.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java create mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/DocValueFieldTests.java delete mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldTests.java create mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoPointFieldTests.java create mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoShapeFieldTests.java create mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/MultiFieldTests.java create mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ScriptFieldTests.java create mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/SourceFieldTests.java create mode 100644 x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/TimeFieldTests.java diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java index 114f1ecd0aa83..eca06e90344b6 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java @@ -55,7 +55,7 @@ protected boolean preserveTemplatesUponCompletion() { return true; } - private void setupDataAccessRole(String index) throws IOException { + private static void setupDataAccessRole(String index) throws IOException { Request request = new Request("PUT", "/_security/role/test_data_access"); request.setJsonEntity("{" + " \"indices\" : [" @@ -283,10 +283,12 @@ public void testLookbackOnlyWithSourceDisabled() throws Exception { new LookbackOnlyTestHelper("test-lookback-only-with-source-disabled", "airline-data-disabled-source").execute(); } - @AwaitsFix(bugUrl = "This test uses painless which is not available in the integTest phase") public void testLookbackOnlyWithScriptFields() throws Exception { - new LookbackOnlyTestHelper("test-lookback-only-with-script-fields", "airline-data-disabled-source") - .setAddScriptedFields(true).execute(); + new LookbackOnlyTestHelper("test-lookback-only-with-script-fields", "airline-data") + .setScriptedFields( + "{\"scripted_airline\":{\"script\":{\"lang\":\"painless\",\"source\":\"doc['airline.keyword'].value\"}}}") + .setAirlineVariant("scripted_airline") + .execute(); } public void testLookbackOnlyWithNestedFields() throws Exception { @@ -1088,7 +1090,7 @@ private class LookbackOnlyTestHelper { private String jobId; private String airlineVariant; private String dataIndex; - private boolean addScriptedFields; + private String scriptedFields; private boolean shouldSucceedInput; private boolean shouldSucceedProcessing; @@ -1100,8 +1102,8 @@ private class LookbackOnlyTestHelper { this.airlineVariant = "airline"; } - public LookbackOnlyTestHelper setAddScriptedFields(boolean value) { - addScriptedFields = value; + public LookbackOnlyTestHelper setScriptedFields(String scriptFields) { + this.scriptedFields = scriptFields; return this; } @@ -1124,10 +1126,7 @@ public LookbackOnlyTestHelper setShouldSucceedProcessing(boolean value) { public void execute() throws Exception { createJob(jobId, airlineVariant); String datafeedId = "datafeed-" + jobId; - new DatafeedBuilder(datafeedId, jobId, dataIndex) - .setScriptedFields(addScriptedFields ? - "{\"airline\":{\"script\":{\"lang\":\"painless\",\"inline\":\"doc['airline'].value\"}}}" : null) - .build(); + new DatafeedBuilder(datafeedId, jobId, dataIndex).setScriptedFields(scriptedFields).build(); openJob(client(), jobId); startDatafeedAndWaitUntilStopped(datafeedId); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java index 0621dc32fc9a8..e0bf14b1bb1b3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java @@ -130,7 +130,7 @@ private SearchRequestBuilder buildSearchRequest(long start) { context.query, context.extractedFields.timeField(), start, context.end)); for (ExtractedField docValueField : context.extractedFields.getDocValueFields()) { - searchRequestBuilder.addDocValueField(docValueField.getName(), docValueField.getDocValueFormat()); + searchRequestBuilder.addDocValueField(docValueField.getSearchField(), docValueField.getDocValueFormat()); } String[] sourceFields = context.extractedFields.getSourceFields(); if (sourceFields.length == 0) { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java index 8838954a149b4..357ac8ee900af 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java @@ -29,7 +29,7 @@ class SearchHitToJsonProcessor implements Releasable { public void process(SearchHit hit) throws IOException { jsonBuilder.startObject(); for (ExtractedField field : fields.getAllFields()) { - writeKeyValue(field.getAlias(), field.value(hit)); + writeKeyValue(field.getName(), field.value(hit)); } jsonBuilder.endObject(); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java index cd058b32c6f04..f46cc9f216fc3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java @@ -14,7 +14,6 @@ import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Set; @@ -42,13 +41,13 @@ public String timeField() { public Long timeFieldValue(SearchHit hit) { Object[] value = timeField.value(hit); if (value.length != 1) { - throw new RuntimeException("Time field [" + timeField.getAlias() + "] expected a single value; actual was: " + throw new RuntimeException("Time field [" + timeField.getName() + "] expected a single value; actual was: " + Arrays.toString(value)); } if (value[0] instanceof Long) { return (Long) value[0]; } - throw new RuntimeException("Time field [" + timeField.getAlias() + "] expected a long value; actual was: " + value[0]); + throw new RuntimeException("Time field [" + timeField.getName() + "] expected a long value; actual was: " + value[0]); } public static TimeBasedExtractedFields build(Job job, DatafeedConfig datafeed, FieldCapabilitiesResponse fieldsCapabilities) { @@ -58,20 +57,18 @@ public static TimeBasedExtractedFields build(Job job, DatafeedConfig datafeed, F if (scriptFields.contains(timeField) == false && extractionMethodDetector.isAggregatable(timeField) == false) { throw new IllegalArgumentException("cannot retrieve time field [" + timeField + "] because it is not aggregatable"); } - ExtractedField timeExtractedField = extractedTimeField(timeField, scriptFields, fieldsCapabilities); + ExtractedField timeExtractedField = extractedTimeField(timeField, scriptFields); List remainingFields = job.allInputFields().stream().filter(f -> !f.equals(timeField)).collect(Collectors.toList()); List allExtractedFields = new ArrayList<>(remainingFields.size() + 1); allExtractedFields.add(timeExtractedField); remainingFields.stream().forEach(field -> allExtractedFields.add(extractionMethodDetector.detect(field))); + return new TimeBasedExtractedFields(timeExtractedField, allExtractedFields); } - private static ExtractedField extractedTimeField(String timeField, Set scriptFields, - FieldCapabilitiesResponse fieldCapabilities) { - if (scriptFields.contains(timeField)) { - return ExtractedField.newTimeField(timeField, Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD); - } - return ExtractedField.newTimeField(timeField, fieldCapabilities.getField(timeField).keySet(), - ExtractedField.ExtractionMethod.DOC_VALUE); + private static ExtractedField extractedTimeField(String timeField, Set scriptFields) { + ExtractedField.Method method = scriptFields.contains(timeField) ? ExtractedField.Method.SCRIPT_FIELD + : ExtractedField.Method.DOC_VALUE; + return ExtractedFields.newTimeField(timeField, method); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java index 46857593fc488..41b8f8293fcc5 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java @@ -24,8 +24,8 @@ import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.ml.dataframe.analyses.DataFrameAnalysis; -import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex; +import org.elasticsearch.xpack.ml.extractor.ExtractedField; import java.io.IOException; import java.util.ArrayList; @@ -138,7 +138,7 @@ private SearchRequestBuilder buildSearchRequest() { setFetchSource(searchRequestBuilder); for (ExtractedField docValueField : context.extractedFields.getDocValueFields()) { - searchRequestBuilder.addDocValueField(docValueField.getName(), docValueField.getDocValueFormat()); + searchRequestBuilder.addDocValueField(docValueField.getSearchField(), docValueField.getDocValueFormat()); } return searchRequestBuilder; @@ -231,7 +231,7 @@ private void clearScroll(String scrollId) { } public List getFieldNames() { - return context.extractedFields.getAllFields().stream().map(ExtractedField::getAlias).collect(Collectors.toList()); + return context.extractedFields.getAllFields().stream().map(ExtractedField::getName).collect(Collectors.toList()); } public DataSummary collectDataSummary() { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java index 1615f2fc8b1f9..6ed1ea62fe8a4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java @@ -11,11 +11,9 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.document.DocumentField; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.BooleanFieldMapper; -import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest; @@ -24,9 +22,9 @@ import org.elasticsearch.xpack.core.ml.job.messages.Messages; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.core.ml.utils.NameResolver; +import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex; import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; -import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex; import java.util.ArrayList; import java.util.Arrays; @@ -264,13 +262,13 @@ private ExtractedFields fetchBooleanFieldsAsIntegers(ExtractedFields extractedFi List adjusted = new ArrayList<>(extractedFields.getAllFields().size()); for (ExtractedField field : extractedFields.getAllFields()) { if (isBoolean(field.getTypes())) { - if (config.getAnalysis().getAllowedCategoricalTypes(field.getAlias()).contains(BooleanFieldMapper.CONTENT_TYPE)) { + if (config.getAnalysis().getAllowedCategoricalTypes(field.getName()).contains(BooleanFieldMapper.CONTENT_TYPE)) { // We convert boolean field to string if it is a categorical dependent variable - adjusted.add(new BooleanMapper<>(field, Boolean.TRUE.toString(), Boolean.FALSE.toString())); + adjusted.add(ExtractedFields.applyBooleanMapping(field, Boolean.TRUE.toString(), Boolean.FALSE.toString())); } else { // We convert boolean fields to integers with values 0, 1 as this is the preferred // way to consume such features in the analytics process. - adjusted.add(new BooleanMapper<>(field, 1, 0)); + adjusted.add(ExtractedFields.applyBooleanMapping(field, 1, 0)); } } else { adjusted.add(field); @@ -282,33 +280,4 @@ private ExtractedFields fetchBooleanFieldsAsIntegers(ExtractedFields extractedFi private static boolean isBoolean(Set types) { return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE); } - - /** - * {@link BooleanMapper} makes boolean field behave as a field of different type. - */ - private static final class BooleanMapper extends ExtractedField { - - private final T trueValue; - private final T falseValue; - - BooleanMapper(ExtractedField field, T trueValue, T falseValue) { - super(field.getAlias(), field.getName(), Collections.singleton(BooleanFieldMapper.CONTENT_TYPE), ExtractionMethod.DOC_VALUE); - this.trueValue = trueValue; - this.falseValue = falseValue; - } - - @Override - public Object[] value(SearchHit hit) { - DocumentField keyValue = hit.field(name); - if (keyValue != null) { - return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray(); - } - return new Object[0]; - } - - @Override - public boolean supportsFromSource() { - return false; - } - } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/AbstractField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/AbstractField.java new file mode 100644 index 0000000000000..86ac35a614df5 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/AbstractField.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.common.document.DocumentField; +import org.elasticsearch.search.SearchHit; + +import java.util.List; +import java.util.Objects; +import java.util.Set; + +abstract class AbstractField implements ExtractedField { + + private final String name; + + private final Set types; + + AbstractField(String name, Set types) { + this.name = Objects.requireNonNull(name); + this.types = Objects.requireNonNull(types); + } + + @Override + public String getName() { + return name; + } + + @Override + public String getSearchField() { + return name; + } + + @Override + public Set getTypes() { + return types; + } + + protected Object[] getFieldValue(SearchHit hit) { + DocumentField keyValue = hit.field(getSearchField()); + if (keyValue != null) { + List values = keyValue.getValues(); + return values.toArray(new Object[0]); + } + return new Object[0]; + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/DocValueField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/DocValueField.java new file mode 100644 index 0000000000000..b8683fe120e35 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/DocValueField.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.search.SearchHit; + +import java.util.Set; + +public class DocValueField extends AbstractField { + + public DocValueField(String name, Set types) { + super(name, types); + } + + @Override + public Method getMethod() { + return Method.DOC_VALUE; + } + + @Override + public Object[] value(SearchHit hit) { + return getFieldValue(hit); + } + + @Override + public boolean supportsFromSource() { + return true; + } + + @Override + public ExtractedField newFromSource() { + return new SourceField(getSearchField(), getTypes()); + } + + @Override + public boolean isMultiField() { + return false; + } + + @Nullable + public String getDocValueFormat() { + return null; + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedField.java index c47b77d2ddc11..60a1f4883d120 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedField.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedField.java @@ -5,305 +5,77 @@ */ package org.elasticsearch.xpack.ml.extractor; -import org.elasticsearch.common.document.DocumentField; -import org.elasticsearch.geometry.Geometry; -import org.elasticsearch.geometry.Point; -import org.elasticsearch.geometry.ShapeType; -import org.elasticsearch.geometry.utils.StandardValidator; -import org.elasticsearch.geometry.utils.WellKnownText; import org.elasticsearch.search.SearchHit; -import java.io.IOException; -import java.text.ParseException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; import java.util.Set; /** - * Represents a field to be extracted by the datafeed. - * It encapsulates the extraction logic. + * Describes how to extract an analyzed field */ -public abstract class ExtractedField { +public interface ExtractedField { - public enum ExtractionMethod { + enum Method { SOURCE, DOC_VALUE, SCRIPT_FIELD } - /** The name of the field as configured in the job */ - protected final String alias; - - /** The name of the field we extract */ - protected final String name; - - private final Set types; - - private final ExtractionMethod extractionMethod; - - protected ExtractedField(String alias, String name, Set types, ExtractionMethod extractionMethod) { - this.alias = Objects.requireNonNull(alias); - this.name = Objects.requireNonNull(name); - this.types = Objects.requireNonNull(types); - this.extractionMethod = Objects.requireNonNull(extractionMethod); - } - - public String getAlias() { - return alias; - } - - public String getName() { - return name; - } - - public Set getTypes() { - return types; - } - - public ExtractionMethod getExtractionMethod() { - return extractionMethod; - } - - public abstract Object[] value(SearchHit hit); - - public abstract boolean supportsFromSource(); - - public String getDocValueFormat() { - return null; - } - - public static ExtractedField newTimeField(String name, Set types, ExtractionMethod extractionMethod) { - if (extractionMethod == ExtractionMethod.SOURCE) { - throw new IllegalArgumentException("time field cannot be extracted from source"); - } - return new TimeField(name, types, extractionMethod); - } - - public static ExtractedField newGeoShapeField(String alias, String name) { - return new GeoShapeField(alias, name, Collections.singleton("geo_shape")); - } - - public static ExtractedField newGeoPointField(String alias, String name) { - return new GeoPointField(alias, name, Collections.singleton("geo_point")); - } - - public static ExtractedField newField(String name, Set types, ExtractionMethod extractionMethod) { - return newField(name, name, types, extractionMethod); - } - - public static ExtractedField newField(String alias, String name, Set types, ExtractionMethod extractionMethod) { - switch (extractionMethod) { - case DOC_VALUE: - case SCRIPT_FIELD: - return new FromFields(alias, name, types, extractionMethod); - case SOURCE: - return new FromSource(alias, name, types); - default: - throw new IllegalArgumentException("Invalid extraction method [" + extractionMethod + "]"); - } - } - - public ExtractedField newFromSource() { - if (supportsFromSource()) { - return new FromSource(alias, name, types); - } - throw new IllegalStateException("Field (alias [" + alias + "], name [" + name + "]) should be extracted via [" - + extractionMethod + "] and cannot be extracted from source"); - } - - private static class FromFields extends ExtractedField { - - FromFields(String alias, String name, Set types, ExtractionMethod extractionMethod) { - super(alias, name, types, extractionMethod); - } - - @Override - public Object[] value(SearchHit hit) { - DocumentField keyValue = hit.field(name); - if (keyValue != null) { - List values = keyValue.getValues(); - return values.toArray(new Object[0]); - } - return new Object[0]; - } - - @Override - public boolean supportsFromSource() { - return getExtractionMethod() == ExtractionMethod.DOC_VALUE; - } - } - - private static class GeoShapeField extends FromSource { - private static final WellKnownText wkt = new WellKnownText(true, new StandardValidator(true)); - - GeoShapeField(String alias, String name, Set types) { - super(alias, name, types); - } - - @Override - public Object[] value(SearchHit hit) { - Object[] value = super.value(hit); - if (value.length != 1) { - throw new IllegalStateException("Unexpected values for a geo_shape field: " + Arrays.toString(value)); - } - if (value[0] instanceof String) { - value[0] = handleString((String) value[0]); - } else if (value[0] instanceof Map) { - @SuppressWarnings("unchecked") - Map geoObject = (Map) value[0]; - value[0] = handleObject(geoObject); - } else { - throw new IllegalStateException("Unexpected value type for a geo_shape field: " + value[0].getClass()); - } - return value; - } - - private String handleString(String geoString) { - try { - if (geoString.startsWith("POINT")) { // Entry is of the form "POINT (-77.03653 38.897676)" - Geometry geometry = wkt.fromWKT(geoString); - if (geometry.type() != ShapeType.POINT) { - throw new IllegalArgumentException("Unexpected non-point geo_shape type: " + geometry.type().name()); - } - Point pt = ((Point)geometry); - return pt.getY() + "," + pt.getX(); - } else { - throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString); - } - } catch (IOException | ParseException ex) { - throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString); - } - } - - private String handleObject(Map geoObject) { - String geoType = (String) geoObject.get("type"); - if (geoType != null && "point".equals(geoType.toLowerCase(Locale.ROOT))) { - @SuppressWarnings("unchecked") - List coordinates = (List) geoObject.get("coordinates"); - if (coordinates == null || coordinates.size() != 2) { - throw new IllegalArgumentException("Invalid coordinates for geo_shape point: " + geoObject); - } - return coordinates.get(1) + "," + coordinates.get(0); - } else { - throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoObject); - } - } - - } - - private static class GeoPointField extends FromFields { - - GeoPointField(String alias, String name, Set types) { - super(alias, name, types, ExtractionMethod.DOC_VALUE); - } - - @Override - public Object[] value(SearchHit hit) { - Object[] value = super.value(hit); - if (value.length != 1) { - throw new IllegalStateException("Unexpected values for a geo_point field: " + Arrays.toString(value)); - } - if (value[0] instanceof String) { - value[0] = handleString((String) value[0]); - } else { - throw new IllegalStateException("Unexpected value type for a geo_point field: " + value[0].getClass()); - } - return value; - } - - private String handleString(String geoString) { - if (geoString.contains(",")) { // Entry is of the form "38.897676, -77.03653" - return geoString.replace(" ", ""); - } else { - throw new IllegalArgumentException("Unexpected value for a geo_point field: " + geoString); - } - } - - @Override - public boolean supportsFromSource() { - return false; - } - } - - private static class TimeField extends FromFields { - - private static final String EPOCH_MILLIS_FORMAT = "epoch_millis"; - - TimeField(String name, Set types, ExtractionMethod extractionMethod) { - super(name, name, types, extractionMethod); - } - - @Override - public Object[] value(SearchHit hit) { - Object[] value = super.value(hit); - if (value.length != 1) { - return value; - } - if (value[0] instanceof String) { // doc_value field with the epoch_millis format - value[0] = Long.parseLong((String) value[0]); - } else if (value[0] instanceof Long == false) { // pre-6.0 field - throw new IllegalStateException("Unexpected value for a time field: " + value[0].getClass()); - } - return value; - } - - @Override - public String getDocValueFormat() { - return EPOCH_MILLIS_FORMAT; - } - - @Override - public boolean supportsFromSource() { - return false; - } - } - - private static class FromSource extends ExtractedField { - - private String[] namePath; - - FromSource(String alias, String name, Set types) { - super(alias, name, types, ExtractionMethod.SOURCE); - namePath = name.split("\\."); - } - - @Override - public Object[] value(SearchHit hit) { - Map source = hit.getSourceAsMap(); - int level = 0; - while (source != null && level < namePath.length - 1) { - source = getNextLevel(source, namePath[level]); - level++; - } - if (source != null) { - Object values = source.get(namePath[level]); - if (values != null) { - if (values instanceof List) { - @SuppressWarnings("unchecked") - List asList = (List) values; - return asList.toArray(new Object[0]); - } else { - return new Object[]{values}; - } - } - } - return new Object[0]; - } - - @Override - public boolean supportsFromSource() { - return true; - } - - @SuppressWarnings("unchecked") - private static Map getNextLevel(Map source, String key) { - Object nextLevel = source.get(key); - if (nextLevel instanceof Map) { - return (Map) source.get(key); - } - return null; - } + /** + * @return The name of the field as expected by the user + */ + String getName(); + + /** + * This is the name of the field we should search for. + * In most cases this is the same as {@link #getName()}. + * However, if the field is a non-aggregatable multi-field + * we cannot retrieve it from source. Thus we search for + * its parent instead. + * @return The name of the field that is searched. + */ + String getSearchField(); + + /** + * @return The field types + */ + Set getTypes(); + + /** + * @return The extraction {@link Method} + */ + Method getMethod(); + + /** + * Extracts the value from a {@link SearchHit} + * @param hit the search hit + * @return the extracted value + */ + Object[] value(SearchHit hit); + + /** + * @return Whether the field can be fetched from source instead + */ + boolean supportsFromSource(); + + /** + * @return A new extraction field that's fetching from source + */ + ExtractedField newFromSource(); + + /** + * @return Whether it is a multi-field + */ + boolean isMultiField(); + + /** + * @return The multi-field parent + */ + default String getParentField() { + throw new UnsupportedOperationException(); + } + + /** + * @return The doc_value format + */ + default String getDocValueFormat() { + throw new UnsupportedOperationException(); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java index a5f5b695172a9..9fe079b745c10 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java @@ -7,6 +7,9 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.common.document.DocumentField; +import org.elasticsearch.index.mapper.BooleanFieldMapper; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.xpack.core.ml.utils.MlStrings; import java.util.Collection; @@ -22,16 +25,14 @@ */ public class ExtractedFields { - private static final String TEXT = "text"; - private final List allFields; private final List docValueFields; private final String[] sourceFields; public ExtractedFields(List allFields) { this.allFields = Collections.unmodifiableList(allFields); - this.docValueFields = filterFields(ExtractedField.ExtractionMethod.DOC_VALUE, allFields); - this.sourceFields = filterFields(ExtractedField.ExtractionMethod.SOURCE, allFields).stream().map(ExtractedField::getName) + this.docValueFields = filterFields(ExtractedField.Method.DOC_VALUE, allFields); + this.sourceFields = filterFields(ExtractedField.Method.SOURCE, allFields).stream().map(ExtractedField::getSearchField) .toArray(String[]::new); } @@ -47,8 +48,8 @@ public List getDocValueFields() { return docValueFields; } - private static List filterFields(ExtractedField.ExtractionMethod method, List fields) { - return fields.stream().filter(field -> field.getExtractionMethod() == method).collect(Collectors.toList()); + private static List filterFields(ExtractedField.Method method, List fields) { + return fields.stream().filter(field -> field.getMethod() == method).collect(Collectors.toList()); } public static ExtractedFields build(Collection allFields, Set scriptFields, @@ -57,6 +58,14 @@ public static ExtractedFields build(Collection allFields, Set sc return new ExtractedFields(allFields.stream().map(field -> extractionMethodDetector.detect(field)).collect(Collectors.toList())); } + public static TimeField newTimeField(String name, ExtractedField.Method method) { + return new TimeField(name, method); + } + + public static ExtractedField applyBooleanMapping(ExtractedField field, T trueValue, T falseValue) { + return new BooleanMapper<>(field, trueValue, falseValue); + } + public static class ExtractionMethodDetector { private final Set scriptFields; @@ -68,38 +77,37 @@ public ExtractionMethodDetector(Set scriptFields, FieldCapabilitiesRespo } public ExtractedField detect(String field) { - String internalField = field; - ExtractedField.ExtractionMethod method = ExtractedField.ExtractionMethod.SOURCE; - Set types = getTypes(field); if (scriptFields.contains(field)) { - method = ExtractedField.ExtractionMethod.SCRIPT_FIELD; - } else if (isAggregatable(field)) { - method = ExtractedField.ExtractionMethod.DOC_VALUE; - if (isFieldOfType(field, "date")) { - return ExtractedField.newTimeField(field, types, method); - } - } else if (isFieldOfType(field, TEXT)) { - String parentField = MlStrings.getParentField(field); - // Field is text so check if it is a multi-field - if (Objects.equals(parentField, field) == false && fieldsCapabilities.getField(parentField) != null) { - // Field is a multi-field which means it won't be available in source. Let's take the parent instead. - internalField = parentField; - method = isAggregatable(parentField) ? ExtractedField.ExtractionMethod.DOC_VALUE - : ExtractedField.ExtractionMethod.SOURCE; + return new ScriptField(field); + } + ExtractedField extractedField = detectNonScriptField(field); + String parentField = MlStrings.getParentField(field); + if (isMultiField(field, parentField)) { + if (isAggregatable(field)) { + return new MultiField(parentField, extractedField); + } else { + ExtractedField parentExtractionField = detectNonScriptField(parentField); + return new MultiField(field, parentField, parentField, parentExtractionField); } } + return extractedField; + } - if (isFieldOfType(field, "geo_point")) { - if (method != ExtractedField.ExtractionMethod.DOC_VALUE) { + private ExtractedField detectNonScriptField(String field) { + if (isFieldOfType(field, TimeField.TYPE) && isAggregatable(field)) { + return new TimeField(field, ExtractedField.Method.DOC_VALUE); + } + if (isFieldOfType(field, GeoPointField.TYPE)) { + if (isAggregatable(field) == false) { throw new IllegalArgumentException("cannot use [geo_point] field with disabled doc values"); } - return ExtractedField.newGeoPointField(field, internalField); + return new GeoPointField(field); } - if (isFieldOfType(field, "geo_shape")) { - return ExtractedField.newGeoShapeField(field, internalField); + if (isFieldOfType(field, GeoShapeField.TYPE)) { + return new GeoShapeField(field); } - - return ExtractedField.newField(field, internalField, types, method); + Set types = getTypes(field); + return isAggregatable(field) ? new DocValueField(field, types) : new SourceField(field, types); } private Set getTypes(String field) { @@ -127,5 +135,57 @@ private boolean isFieldOfType(String field, String type) { } return false; } + + private boolean isMultiField(String field, String parent) { + if (Objects.equals(field, parent)) { + return false; + } + Map parentFieldCaps = fieldsCapabilities.getField(parent); + if (parentFieldCaps == null || (parentFieldCaps.size() == 1 && parentFieldCaps.containsKey("object"))) { + // We check if the parent is an object which is indicated by field caps containing an "object" entry. + // If an object, it's not a multi field + return false; + } + return true; + } + } + + /** + * Makes boolean fields behave as a field of different type. + */ + private static final class BooleanMapper extends DocValueField { + + private static final Set TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE); + + private final T trueValue; + private final T falseValue; + + BooleanMapper(ExtractedField field, T trueValue, T falseValue) { + super(field.getName(), TYPES); + if (field.getMethod() != Method.DOC_VALUE || field.getTypes().contains(BooleanFieldMapper.CONTENT_TYPE) == false) { + throw new IllegalArgumentException("cannot apply boolean mapping to field [" + field.getName() + "]"); + } + this.trueValue = trueValue; + this.falseValue = falseValue; + } + + @Override + public Object[] value(SearchHit hit) { + DocumentField keyValue = hit.field(getName()); + if (keyValue != null) { + return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray(); + } + return new Object[0]; + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoPointField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoPointField.java new file mode 100644 index 0000000000000..e39ac7859c2c3 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoPointField.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; + +public class GeoPointField extends DocValueField { + + static final String TYPE = "geo_point"; + + private static final Set TYPES = Collections.singleton(TYPE); + + public GeoPointField(String name) { + super(name, TYPES); + } + + @Override + public Object[] value(SearchHit hit) { + Object[] value = super.value(hit); + if (value.length != 1) { + throw new IllegalStateException("Unexpected values for a geo_point field: " + Arrays.toString(value)); + } + if (value[0] instanceof String) { + value[0] = handleString((String) value[0]); + } else { + throw new IllegalStateException("Unexpected value type for a geo_point field: " + value[0].getClass()); + } + return value; + } + + private String handleString(String geoString) { + if (geoString.contains(",")) { // Entry is of the form "38.897676, -77.03653" + return geoString.replace(" ", ""); + } else { + throw new IllegalArgumentException("Unexpected value for a geo_point field: " + geoString); + } + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoShapeField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoShapeField.java new file mode 100644 index 0000000000000..5ff58712f6e78 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoShapeField.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.geometry.Geometry; +import org.elasticsearch.geometry.Point; +import org.elasticsearch.geometry.ShapeType; +import org.elasticsearch.geometry.utils.StandardValidator; +import org.elasticsearch.geometry.utils.WellKnownText; +import org.elasticsearch.search.SearchHit; + +import java.io.IOException; +import java.text.ParseException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +public class GeoShapeField extends SourceField { + + static final String TYPE = "geo_shape"; + + private static final Set TYPES = Collections.singleton(TYPE); + + private static final WellKnownText wkt = new WellKnownText(true, new StandardValidator(true)); + + public GeoShapeField(String name) { + super(name, TYPES); + } + + @Override + public Object[] value(SearchHit hit) { + Object[] value = super.value(hit); + if (value.length != 1) { + throw new IllegalStateException("Unexpected values for a geo_shape field: " + Arrays.toString(value)); + } + if (value[0] instanceof String) { + value[0] = handleString((String) value[0]); + } else if (value[0] instanceof Map) { + @SuppressWarnings("unchecked") + Map geoObject = (Map) value[0]; + value[0] = handleObject(geoObject); + } else { + throw new IllegalStateException("Unexpected value type for a geo_shape field: " + value[0].getClass()); + } + return value; + } + + private String handleString(String geoString) { + try { + if (geoString.startsWith("POINT")) { // Entry is of the form "POINT (-77.03653 38.897676)" + Geometry geometry = wkt.fromWKT(geoString); + if (geometry.type() != ShapeType.POINT) { + throw new IllegalArgumentException("Unexpected non-point geo_shape type: " + geometry.type().name()); + } + Point pt = ((Point)geometry); + return pt.getY() + "," + pt.getX(); + } else { + throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString); + } + } catch (IOException | ParseException ex) { + throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString); + } + } + + private String handleObject(Map geoObject) { + String geoType = (String) geoObject.get("type"); + if (geoType != null && "point".equals(geoType.toLowerCase(Locale.ROOT))) { + @SuppressWarnings("unchecked") + List coordinates = (List) geoObject.get("coordinates"); + if (coordinates == null || coordinates.size() != 2) { + throw new IllegalArgumentException("Invalid coordinates for geo_shape point: " + geoObject); + } + return coordinates.get(1) + "," + coordinates.get(0); + } else { + throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoObject); + } + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/MultiField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/MultiField.java new file mode 100644 index 0000000000000..e41905a5faddd --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/MultiField.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.Objects; +import java.util.Set; + +public class MultiField implements ExtractedField { + + private final String name; + private final String searchField; + private final ExtractedField field; + private final String parent; + + MultiField(String parent, ExtractedField field) { + this(field.getName(), field.getSearchField(), parent, field); + } + + MultiField(String name, String searchField, String parent, ExtractedField field) { + this.name = Objects.requireNonNull(name); + this.searchField = Objects.requireNonNull(searchField); + this.field = Objects.requireNonNull(field); + this.parent = Objects.requireNonNull(parent); + } + + @Override + public String getName() { + return name; + } + + @Override + public String getSearchField() { + return searchField; + } + + @Override + public Set getTypes() { + return field.getTypes(); + } + + @Override + public Method getMethod() { + return field.getMethod(); + } + + @Override + public Object[] value(SearchHit hit) { + return field.value(hit); + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isMultiField() { + return true; + } + + @Override + public String getParentField() { + return parent; + } + + @Override + public String getDocValueFormat() { + return field.getDocValueFormat(); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ScriptField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ScriptField.java new file mode 100644 index 0000000000000..3c06c74bc02df --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ScriptField.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.Collections; + +public class ScriptField extends AbstractField { + + public ScriptField(String name) { + super(name, Collections.emptySet()); + } + + @Override + public Method getMethod() { + return Method.SCRIPT_FIELD; + } + + @Override + public Object[] value(SearchHit hit) { + return getFieldValue(hit); + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isMultiField() { + return false; + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/SourceField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/SourceField.java new file mode 100644 index 0000000000000..f70e6e59c05c2 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/SourceField.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class SourceField extends AbstractField { + + private final String[] path; + + public SourceField(String name, Set types) { + super(name, types); + path = name.split("\\."); + } + + @Override + public Method getMethod() { + return Method.SOURCE; + } + + @Override + public Object[] value(SearchHit hit) { + Map source = hit.getSourceAsMap(); + int level = 0; + while (source != null && level < path.length - 1) { + source = getNextLevel(source, path[level]); + level++; + } + if (source != null) { + Object values = source.get(path[level]); + if (values != null) { + if (values instanceof List) { + @SuppressWarnings("unchecked") + List asList = (List) values; + return asList.toArray(new Object[0]); + } else { + return new Object[]{values}; + } + } + } + return new Object[0]; + } + + @SuppressWarnings("unchecked") + private static Map getNextLevel(Map source, String key) { + Object nextLevel = source.get(key); + if (nextLevel instanceof Map) { + return (Map) source.get(key); + } + return null; + } + + @Override + public boolean supportsFromSource() { + return true; + } + + @Override + public ExtractedField newFromSource() { + return this; + } + + @Override + public boolean isMultiField() { + return false; + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java new file mode 100644 index 0000000000000..7d8e03fab9011 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +public class TimeField extends AbstractField { + + static final String TYPE = "date"; + + private static final Set TYPES = Collections.singleton(TYPE); + + private static final String EPOCH_MILLIS_FORMAT = "epoch_millis"; + + private final Method method; + + public TimeField(String name, Method method) { + super(name, TYPES); + if (method == Method.SOURCE) { + throw new IllegalArgumentException("time field [" + name + "] cannot be extracted from source"); + } + this.method = Objects.requireNonNull(method); + } + + @Override + public Method getMethod() { + return method; + } + + @Override + public Object[] value(SearchHit hit) { + Object[] value = getFieldValue(hit); + if (value.length != 1) { + return value; + } + if (value[0] instanceof String) { // doc_value field with the epoch_millis format + value[0] = Long.parseLong((String) value[0]); + } else if (value[0] instanceof Long == false) { // pre-6.0 field + throw new IllegalStateException("Unexpected value for a time field: " + value[0].getClass()); + } + return value; + } + + @Override + public String getDocValueFormat() { + if (method != Method.DOC_VALUE) { + throw new UnsupportedOperationException(); + } + return EPOCH_MILLIS_FORMAT; + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isMultiField() { + return false; + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractorTests.java index 136c045d0b7be..962bd9ee6d3df 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractorTests.java @@ -32,7 +32,9 @@ import org.elasticsearch.xpack.core.ml.datafeed.DatafeedTimingStats; import org.elasticsearch.xpack.ml.datafeed.DatafeedTimingStatsReporter; import org.elasticsearch.xpack.ml.datafeed.DatafeedTimingStatsReporter.DatafeedTimingStatsPersister; +import org.elasticsearch.xpack.ml.extractor.DocValueField; import org.elasticsearch.xpack.ml.extractor.ExtractedField; +import org.elasticsearch.xpack.ml.extractor.TimeField; import org.junit.Before; import org.mockito.ArgumentCaptor; @@ -134,11 +136,9 @@ public void setUpTests() { capturedSearchRequests = new ArrayList<>(); capturedContinueScrollIds = new ArrayList<>(); jobId = "test-job"; - ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); + ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); extractedFields = new TimeBasedExtractedFields(timeField, - Arrays.asList(timeField, ExtractedField.newField("field_1", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE))); + Arrays.asList(timeField, new DocValueField("field_1", Collections.singleton("keyword")))); indices = Arrays.asList("index-1", "index-2"); query = QueryBuilders.matchAllQuery(); scriptFields = Collections.emptyList(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessorTests.java index f2070d1aea584..f8d358f36e9d1 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessorTests.java @@ -7,8 +7,10 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.extractor.DocValueField; import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; +import org.elasticsearch.xpack.ml.extractor.TimeField; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import java.io.ByteArrayOutputStream; @@ -22,14 +24,10 @@ public class SearchHitToJsonProcessorTests extends ESTestCase { public void testProcessGivenSingleHit() throws IOException { - ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField missingField = ExtractedField.newField("missing", Collections.singleton("float"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField singleField = ExtractedField.newField("single", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField arrayField = ExtractedField.newField("array", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); + ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); + ExtractedField missingField = new DocValueField("missing", Collections.singleton("float")); + ExtractedField singleField = new DocValueField("single", Collections.singleton("keyword")); + ExtractedField arrayField = new DocValueField("array", Collections.singleton("keyword")); TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField, Arrays.asList(timeField, missingField, singleField, arrayField)); @@ -45,14 +43,10 @@ public void testProcessGivenSingleHit() throws IOException { } public void testProcessGivenMultipleHits() throws IOException { - ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField missingField = ExtractedField.newField("missing", Collections.singleton("float"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField singleField = ExtractedField.newField("single", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField arrayField = ExtractedField.newField("array", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); + ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); + ExtractedField missingField = new DocValueField("missing", Collections.singleton("float")); + ExtractedField singleField = new DocValueField("single", Collections.singleton("keyword")); + ExtractedField arrayField = new DocValueField("array", Collections.singleton("keyword")); TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField, Arrays.asList(timeField, missingField, singleField, arrayField)); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFieldsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFieldsTests.java index 34b296cf93fc5..e5958879aaebe 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFieldsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFieldsTests.java @@ -15,7 +15,12 @@ import org.elasticsearch.xpack.core.ml.job.config.DataDescription; import org.elasticsearch.xpack.core.ml.job.config.Detector; import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.ml.extractor.DocValueField; import org.elasticsearch.xpack.ml.extractor.ExtractedField; +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; +import org.elasticsearch.xpack.ml.extractor.ScriptField; +import org.elasticsearch.xpack.ml.extractor.SourceField; +import org.elasticsearch.xpack.ml.extractor.TimeField; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import java.util.Arrays; @@ -30,8 +35,7 @@ public class TimeBasedExtractedFieldsTests extends ESTestCase { - private ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); + private ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); public void testInvalidConstruction() { expectThrows(IllegalArgumentException.class, () -> new TimeBasedExtractedFields(timeField, Collections.emptyList())); @@ -48,18 +52,12 @@ public void testTimeFieldOnly() { } public void testAllTypesOfFields() { - ExtractedField docValue1 = ExtractedField.newField("doc1", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField docValue2 = ExtractedField.newField("doc2", Collections.singleton("float"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField scriptField1 = ExtractedField.newField("scripted1", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - ExtractedField scriptField2 = ExtractedField.newField("scripted2", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - ExtractedField sourceField1 = ExtractedField.newField("src1", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); - ExtractedField sourceField2 = ExtractedField.newField("src2", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); + ExtractedField docValue1 = new DocValueField("doc1", Collections.singleton("keyword")); + ExtractedField docValue2 = new DocValueField("doc2", Collections.singleton("float")); + ExtractedField scriptField1 = new ScriptField("scripted1"); + ExtractedField scriptField2 = new ScriptField("scripted2"); + ExtractedField sourceField1 = new SourceField("src1", Collections.singleton("text")); + ExtractedField sourceField2 = new SourceField("src2", Collections.singleton("text")); TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField, Arrays.asList(timeField, docValue1, docValue2, scriptField1, scriptField2, sourceField1, sourceField2)); @@ -182,12 +180,9 @@ public void testBuildGivenMultiFields() { assertThat(extractedFields.getSourceFields()[0], equalTo("airline")); assertThat(extractedFields.getAllFields().size(), equalTo(3)); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("time")).findFirst().get().getAlias(), - equalTo("time")); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airport.keyword")).findFirst().get().getAlias(), - equalTo("airport.keyword")); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airline")).findFirst().get().getAlias(), - equalTo("airline.text")); + assertThat(findField("time", extractedFields).getSearchField(), equalTo("time")); + assertThat(findField("airport.keyword", extractedFields).getSearchField(), equalTo("airport.keyword")); + assertThat(findField("airline.text", extractedFields).getSearchField(), equalTo("airline")); } public void testBuildGivenTimeFieldIsNotAggregatable() { @@ -253,4 +248,8 @@ private static FieldCapabilities createFieldCaps(boolean isAggregatable) { when(fieldCaps.isAggregatable()).thenReturn(isAggregatable); return fieldCaps; } + + private static ExtractedField findField(String name, ExtractedFields fields) { + return fields.getAllFields().stream().filter(f -> f.getName().equals(name)).findFirst().get(); + } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorTests.java index 0915afcc75dfb..fe9dc68755d6e 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorTests.java @@ -27,8 +27,9 @@ import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification; import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetectionTests; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; -import org.elasticsearch.xpack.ml.extractor.ExtractedField; +import org.elasticsearch.xpack.ml.extractor.DocValueField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; +import org.elasticsearch.xpack.ml.extractor.SourceField; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import org.junit.Before; import org.mockito.ArgumentCaptor; @@ -78,8 +79,8 @@ public void setUpTests() { indices = Arrays.asList("index-1", "index-2"); query = QueryBuilders.matchAllQuery(); extractedFields = new ExtractedFields(Arrays.asList( - ExtractedField.newField("field_1", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_2", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE))); + new DocValueField("field_1", Collections.singleton("keyword")), + new DocValueField("field_2", Collections.singleton("keyword")))); scrollSize = 1000; headers = Collections.emptyMap(); @@ -295,8 +296,8 @@ public void testIncludeSourceIsFalseAndNoSourceFields() throws IOException { public void testIncludeSourceIsFalseAndAtLeastOneSourceField() throws IOException { extractedFields = new ExtractedFields(Arrays.asList( - ExtractedField.newField("field_1", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_2", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE))); + new DocValueField("field_1", Collections.singleton("keyword")), + new SourceField("field_2", Collections.singleton("text")))); TestExtractor dataExtractor = createExtractor(false, false); @@ -391,15 +392,15 @@ public void testMissingValues_GivenShouldInclude() throws IOException { public void testGetCategoricalFields() { extractedFields = new ExtractedFields(Arrays.asList( - ExtractedField.newField("field_boolean", Collections.singleton("boolean"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_float", Collections.singleton("float"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_double", Collections.singleton("double"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_byte", Collections.singleton("byte"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_short", Collections.singleton("short"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_integer", Collections.singleton("integer"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_long", Collections.singleton("long"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_keyword", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_text", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE))); + new DocValueField("field_boolean", Collections.singleton("boolean")), + new DocValueField("field_float", Collections.singleton("float")), + new DocValueField("field_double", Collections.singleton("double")), + new DocValueField("field_byte", Collections.singleton("byte")), + new DocValueField("field_short", Collections.singleton("short")), + new DocValueField("field_integer", Collections.singleton("integer")), + new DocValueField("field_long", Collections.singleton("long")), + new DocValueField("field_keyword", Collections.singleton("keyword")), + new SourceField("field_text", Collections.singleton("text")))); TestExtractor dataExtractor = createExtractor(true, true); assertThat(dataExtractor.getCategoricalFields(OutlierDetectionTests.createRandom()), empty()); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java index 053620d14cbaf..ce819e9e6d84a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java @@ -53,7 +53,7 @@ public void testDetect_GivenFloatField() { List allFields = extractedFields.getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_float")); - assertThat(allFields.get(0).getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)); + assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); } public void testDetect_GivenNumericFieldWithMultipleTypes() { @@ -68,7 +68,7 @@ public void testDetect_GivenNumericFieldWithMultipleTypes() { List allFields = extractedFields.getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_number")); - assertThat(allFields.get(0).getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)); + assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); } public void testDetect_GivenOutlierDetectionAndNonNumericField() { @@ -111,8 +111,8 @@ public void testDetect_GivenOutlierDetectionAndMultipleFields() { assertThat(allFields.size(), equalTo(3)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()), containsInAnyOrder("some_float", "some_long", "some_boolean")); - assertThat(allFields.stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE))); + assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.DOC_VALUE))); } public void testDetect_GivenRegressionAndMultipleFields() { @@ -132,8 +132,8 @@ public void testDetect_GivenRegressionAndMultipleFields() { assertThat(allFields.size(), equalTo(5)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean")); - assertThat(allFields.stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE))); + assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.DOC_VALUE))); } public void testDetect_GivenRegressionAndRequiredFieldMissing() { @@ -442,8 +442,8 @@ public void testDetect_GivenLessFieldsThanDocValuesLimit() { List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE))); + assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.DOC_VALUE))); } public void testDetect_GivenEqualFieldsToDocValuesLimit() { @@ -461,8 +461,8 @@ public void testDetect_GivenEqualFieldsToDocValuesLimit() { List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE))); + assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.DOC_VALUE))); } public void testDetect_GivenMoreFieldsThanDocValuesLimit() { @@ -480,8 +480,8 @@ public void testDetect_GivenMoreFieldsThanDocValuesLimit() { List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.SOURCE))); + assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.SOURCE))); } public void testDetect_GivenBooleanField_BooleanMappedAsInteger() { @@ -497,7 +497,7 @@ public void testDetect_GivenBooleanField_BooleanMappedAsInteger() { assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); - assertThat(booleanField.getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)); + assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining(1)); @@ -522,7 +522,7 @@ public void testDetect_GivenBooleanField_BooleanMappedAsString() { assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); - assertThat(booleanField.getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)); + assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining("true")); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/DocValueFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/DocValueFieldTests.java new file mode 100644 index 0000000000000..4cb8c4126c095 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/DocValueFieldTests.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import java.util.Arrays; +import java.util.Collections; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class DocValueFieldTests extends ESTestCase { + + public void testKeyword() { + SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build(); + + ExtractedField field = new DocValueField("a_keyword", Collections.singleton("keyword")); + + assertThat(field.value(hit), equalTo(new String[] { "bar" })); + assertThat(field.getName(), equalTo("a_keyword")); + assertThat(field.getSearchField(), equalTo("a_keyword")); + assertThat(field.getTypes(), contains("keyword")); + assertThat(field.getDocValueFormat(), is(nullValue())); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(field.supportsFromSource(), is(true)); + assertThat(field.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + } + + public void testKeywordArray() { + SearchHit hit = new SearchHitBuilder(42).addField("array", Arrays.asList("a", "b")).build(); + + ExtractedField field = new DocValueField("array", Collections.singleton("keyword")); + + assertThat(field.value(hit), equalTo(new String[] { "a", "b" })); + assertThat(field.getName(), equalTo("array")); + assertThat(field.getSearchField(), equalTo("array")); + assertThat(field.getTypes(), contains("keyword")); + assertThat(field.getDocValueFormat(), is(nullValue())); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(field.supportsFromSource(), is(true)); + assertThat(field.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + + ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword")); + assertThat(missing.value(hit), equalTo(new Object[0])); + } + + public void testMissing() { + SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build(); + + ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword")); + + assertThat(missing.value(hit), equalTo(new Object[0])); + } + + public void testNewFromSource() { + ExtractedField field = new DocValueField("foo", Collections.singleton("keyword")); + + ExtractedField fromSource = field.newFromSource(); + + assertThat(fromSource.getName(), equalTo("foo")); + assertThat(fromSource.getSearchField(), equalTo("foo")); + assertThat(fromSource.getTypes(), contains("keyword")); + expectThrows(UnsupportedOperationException.class, () -> fromSource.getDocValueFormat()); + assertThat(fromSource.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(fromSource.supportsFromSource(), is(true)); + assertThat(fromSource.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> fromSource.getParentField()); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldTests.java deleted file mode 100644 index 92d82ead05bca..0000000000000 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldTests.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.extractor; - -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.ml.test.SearchHitBuilder; - -import java.util.Arrays; -import java.util.Collections; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.startsWith; - -public class ExtractedFieldTests extends ESTestCase { - - public void testValueGivenDocValue() { - SearchHit hit = new SearchHitBuilder(42).addField("single", "bar").addField("array", Arrays.asList("a", "b")).build(); - - ExtractedField single = ExtractedField.newField("single", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(single.value(hit), equalTo(new String[] { "bar" })); - - ExtractedField array = ExtractedField.newField("array", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(array.value(hit), equalTo(new String[] { "a", "b" })); - - ExtractedField missing = ExtractedField.newField("missing",Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(missing.value(hit), equalTo(new Object[0])); - } - - public void testValueGivenScriptField() { - SearchHit hit = new SearchHitBuilder(42).addField("single", "bar").addField("array", Arrays.asList("a", "b")).build(); - - ExtractedField single = ExtractedField.newField("single",Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - assertThat(single.value(hit), equalTo(new String[] { "bar" })); - - ExtractedField array = ExtractedField.newField("array", Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD); - assertThat(array.value(hit), equalTo(new String[] { "a", "b" })); - - ExtractedField missing = ExtractedField.newField("missing", Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD); - assertThat(missing.value(hit), equalTo(new Object[0])); - } - - public void testValueGivenSource() { - SearchHit hit = new SearchHitBuilder(42).setSource("{\"single\":\"bar\",\"array\":[\"a\",\"b\"]}").build(); - - ExtractedField single = ExtractedField.newField("single", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(single.value(hit), equalTo(new String[] { "bar" })); - - ExtractedField array = ExtractedField.newField("array", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(array.value(hit), equalTo(new String[] { "a", "b" })); - - ExtractedField missing = ExtractedField.newField("missing", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(missing.value(hit), equalTo(new Object[0])); - } - - public void testValueGivenNestedSource() { - SearchHit hit = new SearchHitBuilder(42).setSource("{\"level_1\":{\"level_2\":{\"foo\":\"bar\"}}}").build(); - - ExtractedField nested = ExtractedField.newField("alias", "level_1.level_2.foo", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); - assertThat(nested.value(hit), equalTo(new String[] { "bar" })); - } - - public void testGeoPoint() { - double lat = 38.897676; - double lon = -77.03653; - String[] expected = new String[] {lat + "," + lon}; - - // doc_value field - ExtractedField geo = ExtractedField.newGeoPointField("geo", "geo"); - SearchHit hit = new SearchHitBuilder(42).addField("geo", lat + ", " + lon).build(); - assertThat(geo.value(hit), equalTo(expected)); - } - - public void testGeoShape() { - double lat = 38.897676; - double lon = -77.03653; - String[] expected = new String[] {lat + "," + lon}; - // object format - SearchHit hit = new SearchHitBuilder(42) - .setSource("{\"geo\":{\"type\":\"point\", \"coordinates\": [" + lon + ", " + lat + "]}}") - .build(); - ExtractedField geo = ExtractedField.newGeoShapeField("geo", "geo"); - assertThat(geo.value(hit), equalTo(expected)); - - // WKT format - hit = new SearchHitBuilder(42).setSource("{\"geo\":\"POINT ("+ lon + " " + lat + ")\"}").build(); - geo = ExtractedField.newGeoShapeField("geo", "geo"); - assertThat(geo.value(hit), equalTo(expected)); - } - - public void testValueGivenSourceAndHitWithNoSource() { - ExtractedField missing = ExtractedField.newField("missing", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(missing.value(new SearchHitBuilder(3).build()), equalTo(new Object[0])); - } - - public void testValueGivenMismatchingMethod() { - SearchHit hit = new SearchHitBuilder(42).addField("a", 1).setSource("{\"b\":2}").build(); - - ExtractedField invalidA = ExtractedField.newField("a", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(invalidA.value(hit), equalTo(new Object[0])); - ExtractedField validA = ExtractedField.newField("a", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(validA.value(hit), equalTo(new Integer[] { 1 })); - - ExtractedField invalidB = ExtractedField.newField("b", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(invalidB.value(hit), equalTo(new Object[0])); - ExtractedField validB = ExtractedField.newField("b", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(validB.value(hit), equalTo(new Integer[] { 2 })); - } - - public void testValueGivenEmptyHit() { - SearchHit hit = new SearchHitBuilder(42).build(); - - ExtractedField docValue = ExtractedField.newField("a", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(docValue.value(hit), equalTo(new Object[0])); - - ExtractedField sourceField = ExtractedField.newField("b", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(sourceField.value(hit), equalTo(new Object[0])); - } - - public void testNewTimeFieldGivenSource() { - expectThrows(IllegalArgumentException.class, () -> ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.SOURCE)); - } - - public void testValueGivenStringTimeField() { - final long millis = randomLong(); - final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", Long.toString(millis)).build(); - final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(timeField.value(hit), equalTo(new Object[] { millis })); - } - - public void testValueGivenLongTimeField() { - final long millis = randomLong(); - final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build(); - final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(timeField.value(hit), equalTo(new Object[] { millis })); - } - - public void testValueGivenPre6xTimeField() { - // Prior to 6.x, timestamps were simply `long` milliseconds-past-the-epoch values - final long millis = randomLong(); - final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build(); - final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(timeField.value(hit), equalTo(new Object[] { millis })); - } - - public void testValueGivenUnknownFormatTimeField() { - final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", new Object()).build(); - final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(expectThrows(IllegalStateException.class, () -> timeField.value(hit)).getMessage(), - startsWith("Unexpected value for a time field")); - } - - public void testAliasVersusName() { - SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("b", 2).build(); - - ExtractedField field = ExtractedField.newField("a", "a", Collections.singleton("int"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(field.getAlias(), equalTo("a")); - assertThat(field.getName(), equalTo("a")); - assertThat(field.value(hit), equalTo(new Integer[] { 1 })); - - hit = new SearchHitBuilder(42).addField("a", 1).addField("b", 2).build(); - - field = ExtractedField.newField("a", "b", Collections.singleton("int"), ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(field.getAlias(), equalTo("a")); - assertThat(field.getName(), equalTo("b")); - assertThat(field.value(hit), equalTo(new Integer[] { 2 })); - } - - public void testGetDocValueFormat() { - for (ExtractedField.ExtractionMethod method : ExtractedField.ExtractionMethod.values()) { - assertThat(ExtractedField.newField("f", Collections.emptySet(), method).getDocValueFormat(), equalTo(null)); - } - assertThat(ExtractedField.newTimeField("doc_value_time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE).getDocValueFormat(), equalTo("epoch_millis")); - assertThat(ExtractedField.newTimeField("source_time", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD).getDocValueFormat(), equalTo("epoch_millis")); - } -} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java index cee2a7154cf25..9613d14fb5f00 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java @@ -7,12 +7,9 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; -import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; -import org.elasticsearch.xpack.core.ml.job.config.DataDescription; -import org.elasticsearch.xpack.core.ml.job.config.Detector; -import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import java.util.Arrays; import java.util.Collections; @@ -21,24 +18,19 @@ import java.util.Map; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class ExtractedFieldsTests extends ESTestCase { public void testAllTypesOfFields() { - ExtractedField docValue1 = ExtractedField.newField("doc1", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField docValue2 = ExtractedField.newField("doc2", Collections.singleton("ip"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField scriptField1 = ExtractedField.newField("scripted1", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - ExtractedField scriptField2 = ExtractedField.newField("scripted2", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - ExtractedField sourceField1 = ExtractedField.newField("src1", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); - ExtractedField sourceField2 = ExtractedField.newField("src2", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); + ExtractedField docValue1 = new DocValueField("doc1", Collections.singleton("keyword")); + ExtractedField docValue2 = new DocValueField("doc2", Collections.singleton("ip")); + ExtractedField scriptField1 = new ScriptField("scripted1"); + ExtractedField scriptField2 = new ScriptField("scripted2"); + ExtractedField sourceField1 = new SourceField("src1", Collections.singleton("text")); + ExtractedField sourceField2 = new SourceField("src2", Collections.singleton("text")); ExtractedFields extractedFields = new ExtractedFields(Arrays.asList( docValue1, docValue2, scriptField1, scriptField2, sourceField1, sourceField2)); @@ -74,16 +66,6 @@ public void testBuildGivenMixtureOfTypes() { } public void testBuildGivenMultiFields() { - Job.Builder jobBuilder = new Job.Builder("foo"); - jobBuilder.setDataDescription(new DataDescription.Builder()); - Detector.Builder detector = new Detector.Builder("count", null); - detector.setByFieldName("airline.text"); - detector.setOverFieldName("airport.keyword"); - jobBuilder.setAnalysisConfig(new AnalysisConfig.Builder(Collections.singletonList(detector.build()))); - - DatafeedConfig.Builder datafeedBuilder = new DatafeedConfig.Builder("feed", jobBuilder.getId()); - datafeedBuilder.setIndices(Collections.singletonList("foo")); - Map text = new HashMap<>(); text.put("text", createFieldCaps(false)); Map keyword = new HashMap<>(); @@ -103,10 +85,34 @@ public void testBuildGivenMultiFields() { assertThat(extractedFields.getSourceFields()[0], equalTo("airline")); assertThat(extractedFields.getAllFields().size(), equalTo(2)); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airport.keyword")).findFirst().get().getAlias(), - equalTo("airport.keyword")); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airline")).findFirst().get().getAlias(), - equalTo("airline.text")); + ExtractedField airlineField = extractedFields.getAllFields().get(0); + assertThat(airlineField.isMultiField(), is(true)); + assertThat(airlineField.getName(), equalTo("airline.text")); + assertThat(airlineField.getSearchField(), equalTo("airline")); + assertThat(airlineField.getParentField(), equalTo("airline")); + + ExtractedField airportField = extractedFields.getAllFields().get(1); + assertThat(airportField.isMultiField(), is(true)); + assertThat(airportField.getName(), equalTo("airport.keyword")); + assertThat(airportField.getSearchField(), equalTo("airport.keyword")); + assertThat(airportField.getParentField(), equalTo("airport")); + } + + public void testApplyBooleanMapping() { + DocValueField aBool = new DocValueField("a_bool", Collections.singleton("boolean")); + + ExtractedField mapped = ExtractedFields.applyBooleanMapping(aBool, 1, 0); + + SearchHit hitTrue = new SearchHitBuilder(42).addField("a_bool", true).build(); + SearchHit hitFalse = new SearchHitBuilder(42).addField("a_bool", false).build(); + + assertThat(mapped.value(hitTrue), equalTo(new Integer[] { 1 })); + assertThat(mapped.value(hitFalse), equalTo(new Integer[] { 0 })); + + assertThat(mapped.getName(), equalTo(aBool.getName())); + assertThat(mapped.getMethod(), equalTo(aBool.getMethod())); + assertThat(mapped.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> mapped.newFromSource()); } public void testBuildGivenFieldWithoutMappings() { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoPointFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoPointFieldTests.java new file mode 100644 index 0000000000000..bd9b00254930d --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoPointFieldTests.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class GeoPointFieldTests extends ESTestCase { + + public void testGivenGeoPoint() { + double lat = 38.897676; + double lon = -77.03653; + String[] expected = new String[] {lat + "," + lon}; + SearchHit hit = new SearchHitBuilder(42).addField("geo", lat + ", " + lon).build(); + + // doc_value field + ExtractedField geo = new GeoPointField("geo"); + + assertThat(geo.value(hit), equalTo(expected)); + assertThat(geo.getName(), equalTo("geo")); + assertThat(geo.getSearchField(), equalTo("geo")); + assertThat(geo.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(geo.getTypes(), contains("geo_point")); + assertThat(geo.getDocValueFormat(), is(nullValue())); + assertThat(geo.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> geo.newFromSource()); + assertThat(geo.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> geo.getParentField()); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoShapeFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoShapeFieldTests.java new file mode 100644 index 0000000000000..79ac9849375cc --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoShapeFieldTests.java @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; + +public class GeoShapeFieldTests extends ESTestCase { + + public void testObjectFormat() { + double lat = 38.897676; + double lon = -77.03653; + String[] expected = new String[] {lat + "," + lon}; + + SearchHit hit = new SearchHitBuilder(42) + .setSource("{\"geo\":{\"type\":\"point\", \"coordinates\": [" + lon + ", " + lat + "]}}") + .build(); + + ExtractedField geo = new GeoShapeField("geo"); + + assertThat(geo.value(hit), equalTo(expected)); + assertThat(geo.getName(), equalTo("geo")); + assertThat(geo.getSearchField(), equalTo("geo")); + assertThat(geo.getTypes(), contains("geo_shape")); + assertThat(geo.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(geo.supportsFromSource(), is(true)); + assertThat(geo.newFromSource(), sameInstance(geo)); + expectThrows(UnsupportedOperationException.class, () -> geo.getDocValueFormat()); + assertThat(geo.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> geo.getParentField()); + } + + public void testWKTFormat() { + double lat = 38.897676; + double lon = -77.03653; + String[] expected = new String[] {lat + "," + lon}; + + SearchHit hit = new SearchHitBuilder(42).setSource("{\"geo\":\"POINT ("+ lon + " " + lat + ")\"}").build(); + + ExtractedField geo = new GeoShapeField("geo"); + + assertThat(geo.value(hit), equalTo(expected)); + assertThat(geo.getName(), equalTo("geo")); + assertThat(geo.getSearchField(), equalTo("geo")); + assertThat(geo.getTypes(), contains("geo_shape")); + assertThat(geo.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(geo.supportsFromSource(), is(true)); + assertThat(geo.newFromSource(), sameInstance(geo)); + expectThrows(UnsupportedOperationException.class, () -> geo.getDocValueFormat()); + assertThat(geo.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> geo.getParentField()); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/MultiFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/MultiFieldTests.java new file mode 100644 index 0000000000000..ddbae4a3ef236 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/MultiFieldTests.java @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import java.util.Collections; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class MultiFieldTests extends ESTestCase { + + public void testGivenSameSearchField() { + SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("a.b", 2).build(); + + ExtractedField wrapped = new DocValueField("a.b", Collections.singleton("integer")); + ExtractedField field = new MultiField("a", wrapped); + + assertThat(field.value(hit), equalTo(new Integer[] { 2 })); + assertThat(field.getName(), equalTo("a.b")); + assertThat(field.getSearchField(), equalTo("a.b")); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(field.isMultiField(), is(true)); + assertThat(field.getParentField(), equalTo("a")); + assertThat(field.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.newFromSource()); + } + + public void testGivenDifferentSearchField() { + SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("a.b", 2).build(); + + ExtractedField wrapped = new DocValueField("a", Collections.singleton("integer")); + ExtractedField field = new MultiField("a.b", "a", "a", wrapped); + + assertThat(field.value(hit), equalTo(new Integer[] { 1 })); + assertThat(field.getName(), equalTo("a.b")); + assertThat(field.getSearchField(), equalTo("a")); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(field.isMultiField(), is(true)); + assertThat(field.getParentField(), equalTo("a")); + assertThat(field.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.newFromSource()); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ScriptFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ScriptFieldTests.java new file mode 100644 index 0000000000000..48629ec4a9fd6 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ScriptFieldTests.java @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import java.util.Arrays; +import java.util.Collections; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class ScriptFieldTests extends ESTestCase { + + public void testKeyword() { + SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build(); + + ExtractedField field = new ScriptField("a_keyword"); + + assertThat(field.value(hit), equalTo(new String[] { "bar" })); + assertThat(field.getName(), equalTo("a_keyword")); + assertThat(field.getSearchField(), equalTo("a_keyword")); + assertThat(field.getTypes().isEmpty(), is(true)); + expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat()); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + assertThat(field.isMultiField(), is(false)); + assertThat(field.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.newFromSource()); + } + + public void testKeywordArray() { + SearchHit hit = new SearchHitBuilder(42).addField("array", Arrays.asList("a", "b")).build(); + + ExtractedField field = new ScriptField("array"); + + assertThat(field.value(hit), equalTo(new String[] { "a", "b" })); + assertThat(field.getName(), equalTo("array")); + assertThat(field.getSearchField(), equalTo("array")); + assertThat(field.getTypes().isEmpty(), is(true)); + expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat()); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + assertThat(field.isMultiField(), is(false)); + assertThat(field.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.newFromSource()); + + ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword")); + assertThat(missing.value(hit), equalTo(new Object[0])); + } + + public void testMissing() { + SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build(); + + ExtractedField missing = new ScriptField("missing"); + + assertThat(missing.value(hit), equalTo(new Object[0])); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/SourceFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/SourceFieldTests.java new file mode 100644 index 0000000000000..03fed7790a358 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/SourceFieldTests.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import java.util.Collections; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; + +public class SourceFieldTests extends ESTestCase { + + public void testSingleValue() { + SearchHit hit = new SearchHitBuilder(42).setSource("{\"single\":\"bar\"}").build(); + + ExtractedField field = new SourceField("single", Collections.singleton("text")); + + assertThat(field.value(hit), equalTo(new String[] { "bar" })); + assertThat(field.getName(), equalTo("single")); + assertThat(field.getSearchField(), equalTo("single")); + assertThat(field.getTypes(), contains("text")); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(field.supportsFromSource(), is(true)); + assertThat(field.newFromSource(), sameInstance(field)); + assertThat(field.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat()); + } + + public void testArray() { + SearchHit hit = new SearchHitBuilder(42).setSource("{\"array\":[\"a\",\"b\"]}").build(); + + ExtractedField field = new SourceField("array", Collections.singleton("text")); + + assertThat(field.value(hit), equalTo(new String[] { "a", "b" })); + assertThat(field.getName(), equalTo("array")); + assertThat(field.getSearchField(), equalTo("array")); + assertThat(field.getTypes(), contains("text")); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(field.supportsFromSource(), is(true)); + assertThat(field.newFromSource(), sameInstance(field)); + assertThat(field.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat()); + } + + public void testMissing() { + SearchHit hit = new SearchHitBuilder(42).setSource("{\"array\":[\"a\",\"b\"]}").build(); + + ExtractedField missing = new SourceField("missing", Collections.singleton("text")); + + assertThat(missing.value(hit), equalTo(new Object[0])); + } + + public void testValueGivenNested() { + SearchHit hit = new SearchHitBuilder(42).setSource("{\"level_1\":{\"level_2\":{\"foo\":\"bar\"}}}").build(); + + ExtractedField nested = new SourceField("level_1.level_2.foo", Collections.singleton("text")); + + assertThat(nested.value(hit), equalTo(new String[] { "bar" })); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/TimeFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/TimeFieldTests.java new file mode 100644 index 0000000000000..e2898703d3460 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/TimeFieldTests.java @@ -0,0 +1,67 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.startsWith; + +public class TimeFieldTests extends ESTestCase { + + public void testDocValueWithStringValue() { + long millis = randomLong(); + SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", Long.toString(millis)).build(); + + ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); + + assertThat(timeField.value(hit), equalTo(new Object[] { millis })); + assertThat(timeField.getName(), equalTo("time")); + assertThat(timeField.getSearchField(), equalTo("time")); + assertThat(timeField.getTypes(), contains("date")); + assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(timeField.getDocValueFormat(), equalTo("epoch_millis")); + assertThat(timeField.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> timeField.newFromSource()); + assertThat(timeField.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> timeField.getParentField()); + } + + public void testScriptWithLongValue() { + long millis = randomLong(); + SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build(); + + ExtractedField timeField = new TimeField("time", ExtractedField.Method.SCRIPT_FIELD); + + assertThat(timeField.value(hit), equalTo(new Object[] { millis })); + assertThat(timeField.getName(), equalTo("time")); + assertThat(timeField.getSearchField(), equalTo("time")); + assertThat(timeField.getTypes(), contains("date")); + assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD)); + expectThrows(UnsupportedOperationException.class, () -> timeField.getDocValueFormat()); + assertThat(timeField.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> timeField.newFromSource()); + assertThat(timeField.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> timeField.getParentField()); + } + + public void testUnknownFormat() { + final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", new Object()).build(); + + final ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); + + assertThat(expectThrows(IllegalStateException.class, () -> timeField.value(hit)).getMessage(), + startsWith("Unexpected value for a time field")); + } + + public void testSourceNotSupported() { + expectThrows(IllegalArgumentException.class, () -> new TimeField("foo", ExtractedField.Method.SOURCE)); + } +}