diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java index 114f1ecd0aa83..eca06e90344b6 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobsRestIT.java @@ -55,7 +55,7 @@ protected boolean preserveTemplatesUponCompletion() { return true; } - private void setupDataAccessRole(String index) throws IOException { + private static void setupDataAccessRole(String index) throws IOException { Request request = new Request("PUT", "/_security/role/test_data_access"); request.setJsonEntity("{" + " \"indices\" : [" @@ -283,10 +283,12 @@ public void testLookbackOnlyWithSourceDisabled() throws Exception { new LookbackOnlyTestHelper("test-lookback-only-with-source-disabled", "airline-data-disabled-source").execute(); } - @AwaitsFix(bugUrl = "This test uses painless which is not available in the integTest phase") public void testLookbackOnlyWithScriptFields() throws Exception { - new LookbackOnlyTestHelper("test-lookback-only-with-script-fields", "airline-data-disabled-source") - .setAddScriptedFields(true).execute(); + new LookbackOnlyTestHelper("test-lookback-only-with-script-fields", "airline-data") + .setScriptedFields( + "{\"scripted_airline\":{\"script\":{\"lang\":\"painless\",\"source\":\"doc['airline.keyword'].value\"}}}") + .setAirlineVariant("scripted_airline") + .execute(); } public void testLookbackOnlyWithNestedFields() throws Exception { @@ -1088,7 +1090,7 @@ private class LookbackOnlyTestHelper { private String jobId; private String airlineVariant; private String dataIndex; - private boolean addScriptedFields; + private String scriptedFields; private boolean shouldSucceedInput; private boolean shouldSucceedProcessing; @@ -1100,8 +1102,8 @@ private class LookbackOnlyTestHelper { this.airlineVariant = "airline"; } - public LookbackOnlyTestHelper setAddScriptedFields(boolean value) { - addScriptedFields = value; + public LookbackOnlyTestHelper setScriptedFields(String scriptFields) { + this.scriptedFields = scriptFields; return this; } @@ -1124,10 +1126,7 @@ public LookbackOnlyTestHelper setShouldSucceedProcessing(boolean value) { public void execute() throws Exception { createJob(jobId, airlineVariant); String datafeedId = "datafeed-" + jobId; - new DatafeedBuilder(datafeedId, jobId, dataIndex) - .setScriptedFields(addScriptedFields ? - "{\"airline\":{\"script\":{\"lang\":\"painless\",\"inline\":\"doc['airline'].value\"}}}" : null) - .build(); + new DatafeedBuilder(datafeedId, jobId, dataIndex).setScriptedFields(scriptedFields).build(); openJob(client(), jobId); startDatafeedAndWaitUntilStopped(datafeedId); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java index 0621dc32fc9a8..e0bf14b1bb1b3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractor.java @@ -130,7 +130,7 @@ private SearchRequestBuilder buildSearchRequest(long start) { context.query, context.extractedFields.timeField(), start, context.end)); for (ExtractedField docValueField : context.extractedFields.getDocValueFields()) { - searchRequestBuilder.addDocValueField(docValueField.getName(), docValueField.getDocValueFormat()); + searchRequestBuilder.addDocValueField(docValueField.getSearchField(), docValueField.getDocValueFormat()); } String[] sourceFields = context.extractedFields.getSourceFields(); if (sourceFields.length == 0) { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java index 8838954a149b4..357ac8ee900af 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessor.java @@ -29,7 +29,7 @@ class SearchHitToJsonProcessor implements Releasable { public void process(SearchHit hit) throws IOException { jsonBuilder.startObject(); for (ExtractedField field : fields.getAllFields()) { - writeKeyValue(field.getAlias(), field.value(hit)); + writeKeyValue(field.getName(), field.value(hit)); } jsonBuilder.endObject(); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java index cd058b32c6f04..f46cc9f216fc3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFields.java @@ -14,7 +14,6 @@ import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.Set; @@ -42,13 +41,13 @@ public String timeField() { public Long timeFieldValue(SearchHit hit) { Object[] value = timeField.value(hit); if (value.length != 1) { - throw new RuntimeException("Time field [" + timeField.getAlias() + "] expected a single value; actual was: " + throw new RuntimeException("Time field [" + timeField.getName() + "] expected a single value; actual was: " + Arrays.toString(value)); } if (value[0] instanceof Long) { return (Long) value[0]; } - throw new RuntimeException("Time field [" + timeField.getAlias() + "] expected a long value; actual was: " + value[0]); + throw new RuntimeException("Time field [" + timeField.getName() + "] expected a long value; actual was: " + value[0]); } public static TimeBasedExtractedFields build(Job job, DatafeedConfig datafeed, FieldCapabilitiesResponse fieldsCapabilities) { @@ -58,20 +57,18 @@ public static TimeBasedExtractedFields build(Job job, DatafeedConfig datafeed, F if (scriptFields.contains(timeField) == false && extractionMethodDetector.isAggregatable(timeField) == false) { throw new IllegalArgumentException("cannot retrieve time field [" + timeField + "] because it is not aggregatable"); } - ExtractedField timeExtractedField = extractedTimeField(timeField, scriptFields, fieldsCapabilities); + ExtractedField timeExtractedField = extractedTimeField(timeField, scriptFields); List remainingFields = job.allInputFields().stream().filter(f -> !f.equals(timeField)).collect(Collectors.toList()); List allExtractedFields = new ArrayList<>(remainingFields.size() + 1); allExtractedFields.add(timeExtractedField); remainingFields.stream().forEach(field -> allExtractedFields.add(extractionMethodDetector.detect(field))); + return new TimeBasedExtractedFields(timeExtractedField, allExtractedFields); } - private static ExtractedField extractedTimeField(String timeField, Set scriptFields, - FieldCapabilitiesResponse fieldCapabilities) { - if (scriptFields.contains(timeField)) { - return ExtractedField.newTimeField(timeField, Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD); - } - return ExtractedField.newTimeField(timeField, fieldCapabilities.getField(timeField).keySet(), - ExtractedField.ExtractionMethod.DOC_VALUE); + private static ExtractedField extractedTimeField(String timeField, Set scriptFields) { + ExtractedField.Method method = scriptFields.contains(timeField) ? ExtractedField.Method.SCRIPT_FIELD + : ExtractedField.Method.DOC_VALUE; + return ExtractedFields.newTimeField(timeField, method); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java index 46857593fc488..41b8f8293fcc5 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractor.java @@ -24,8 +24,8 @@ import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.ml.dataframe.analyses.DataFrameAnalysis; -import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex; +import org.elasticsearch.xpack.ml.extractor.ExtractedField; import java.io.IOException; import java.util.ArrayList; @@ -138,7 +138,7 @@ private SearchRequestBuilder buildSearchRequest() { setFetchSource(searchRequestBuilder); for (ExtractedField docValueField : context.extractedFields.getDocValueFields()) { - searchRequestBuilder.addDocValueField(docValueField.getName(), docValueField.getDocValueFormat()); + searchRequestBuilder.addDocValueField(docValueField.getSearchField(), docValueField.getDocValueFormat()); } return searchRequestBuilder; @@ -231,7 +231,7 @@ private void clearScroll(String scrollId) { } public List getFieldNames() { - return context.extractedFields.getAllFields().stream().map(ExtractedField::getAlias).collect(Collectors.toList()); + return context.extractedFields.getAllFields().stream().map(ExtractedField::getName).collect(Collectors.toList()); } public DataSummary collectDataSummary() { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java index 1615f2fc8b1f9..6ed1ea62fe8a4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java @@ -11,11 +11,9 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.document.DocumentField; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.BooleanFieldMapper; -import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest; @@ -24,9 +22,9 @@ import org.elasticsearch.xpack.core.ml.job.messages.Messages; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.core.ml.utils.NameResolver; +import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex; import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; -import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex; import java.util.ArrayList; import java.util.Arrays; @@ -264,13 +262,13 @@ private ExtractedFields fetchBooleanFieldsAsIntegers(ExtractedFields extractedFi List adjusted = new ArrayList<>(extractedFields.getAllFields().size()); for (ExtractedField field : extractedFields.getAllFields()) { if (isBoolean(field.getTypes())) { - if (config.getAnalysis().getAllowedCategoricalTypes(field.getAlias()).contains(BooleanFieldMapper.CONTENT_TYPE)) { + if (config.getAnalysis().getAllowedCategoricalTypes(field.getName()).contains(BooleanFieldMapper.CONTENT_TYPE)) { // We convert boolean field to string if it is a categorical dependent variable - adjusted.add(new BooleanMapper<>(field, Boolean.TRUE.toString(), Boolean.FALSE.toString())); + adjusted.add(ExtractedFields.applyBooleanMapping(field, Boolean.TRUE.toString(), Boolean.FALSE.toString())); } else { // We convert boolean fields to integers with values 0, 1 as this is the preferred // way to consume such features in the analytics process. - adjusted.add(new BooleanMapper<>(field, 1, 0)); + adjusted.add(ExtractedFields.applyBooleanMapping(field, 1, 0)); } } else { adjusted.add(field); @@ -282,33 +280,4 @@ private ExtractedFields fetchBooleanFieldsAsIntegers(ExtractedFields extractedFi private static boolean isBoolean(Set types) { return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE); } - - /** - * {@link BooleanMapper} makes boolean field behave as a field of different type. - */ - private static final class BooleanMapper extends ExtractedField { - - private final T trueValue; - private final T falseValue; - - BooleanMapper(ExtractedField field, T trueValue, T falseValue) { - super(field.getAlias(), field.getName(), Collections.singleton(BooleanFieldMapper.CONTENT_TYPE), ExtractionMethod.DOC_VALUE); - this.trueValue = trueValue; - this.falseValue = falseValue; - } - - @Override - public Object[] value(SearchHit hit) { - DocumentField keyValue = hit.field(name); - if (keyValue != null) { - return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray(); - } - return new Object[0]; - } - - @Override - public boolean supportsFromSource() { - return false; - } - } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/AbstractField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/AbstractField.java new file mode 100644 index 0000000000000..86ac35a614df5 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/AbstractField.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.common.document.DocumentField; +import org.elasticsearch.search.SearchHit; + +import java.util.List; +import java.util.Objects; +import java.util.Set; + +abstract class AbstractField implements ExtractedField { + + private final String name; + + private final Set types; + + AbstractField(String name, Set types) { + this.name = Objects.requireNonNull(name); + this.types = Objects.requireNonNull(types); + } + + @Override + public String getName() { + return name; + } + + @Override + public String getSearchField() { + return name; + } + + @Override + public Set getTypes() { + return types; + } + + protected Object[] getFieldValue(SearchHit hit) { + DocumentField keyValue = hit.field(getSearchField()); + if (keyValue != null) { + List values = keyValue.getValues(); + return values.toArray(new Object[0]); + } + return new Object[0]; + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/DocValueField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/DocValueField.java new file mode 100644 index 0000000000000..b8683fe120e35 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/DocValueField.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.search.SearchHit; + +import java.util.Set; + +public class DocValueField extends AbstractField { + + public DocValueField(String name, Set types) { + super(name, types); + } + + @Override + public Method getMethod() { + return Method.DOC_VALUE; + } + + @Override + public Object[] value(SearchHit hit) { + return getFieldValue(hit); + } + + @Override + public boolean supportsFromSource() { + return true; + } + + @Override + public ExtractedField newFromSource() { + return new SourceField(getSearchField(), getTypes()); + } + + @Override + public boolean isMultiField() { + return false; + } + + @Nullable + public String getDocValueFormat() { + return null; + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedField.java index c47b77d2ddc11..60a1f4883d120 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedField.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedField.java @@ -5,305 +5,77 @@ */ package org.elasticsearch.xpack.ml.extractor; -import org.elasticsearch.common.document.DocumentField; -import org.elasticsearch.geometry.Geometry; -import org.elasticsearch.geometry.Point; -import org.elasticsearch.geometry.ShapeType; -import org.elasticsearch.geometry.utils.StandardValidator; -import org.elasticsearch.geometry.utils.WellKnownText; import org.elasticsearch.search.SearchHit; -import java.io.IOException; -import java.text.ParseException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; import java.util.Set; /** - * Represents a field to be extracted by the datafeed. - * It encapsulates the extraction logic. + * Describes how to extract an analyzed field */ -public abstract class ExtractedField { +public interface ExtractedField { - public enum ExtractionMethod { + enum Method { SOURCE, DOC_VALUE, SCRIPT_FIELD } - /** The name of the field as configured in the job */ - protected final String alias; - - /** The name of the field we extract */ - protected final String name; - - private final Set types; - - private final ExtractionMethod extractionMethod; - - protected ExtractedField(String alias, String name, Set types, ExtractionMethod extractionMethod) { - this.alias = Objects.requireNonNull(alias); - this.name = Objects.requireNonNull(name); - this.types = Objects.requireNonNull(types); - this.extractionMethod = Objects.requireNonNull(extractionMethod); - } - - public String getAlias() { - return alias; - } - - public String getName() { - return name; - } - - public Set getTypes() { - return types; - } - - public ExtractionMethod getExtractionMethod() { - return extractionMethod; - } - - public abstract Object[] value(SearchHit hit); - - public abstract boolean supportsFromSource(); - - public String getDocValueFormat() { - return null; - } - - public static ExtractedField newTimeField(String name, Set types, ExtractionMethod extractionMethod) { - if (extractionMethod == ExtractionMethod.SOURCE) { - throw new IllegalArgumentException("time field cannot be extracted from source"); - } - return new TimeField(name, types, extractionMethod); - } - - public static ExtractedField newGeoShapeField(String alias, String name) { - return new GeoShapeField(alias, name, Collections.singleton("geo_shape")); - } - - public static ExtractedField newGeoPointField(String alias, String name) { - return new GeoPointField(alias, name, Collections.singleton("geo_point")); - } - - public static ExtractedField newField(String name, Set types, ExtractionMethod extractionMethod) { - return newField(name, name, types, extractionMethod); - } - - public static ExtractedField newField(String alias, String name, Set types, ExtractionMethod extractionMethod) { - switch (extractionMethod) { - case DOC_VALUE: - case SCRIPT_FIELD: - return new FromFields(alias, name, types, extractionMethod); - case SOURCE: - return new FromSource(alias, name, types); - default: - throw new IllegalArgumentException("Invalid extraction method [" + extractionMethod + "]"); - } - } - - public ExtractedField newFromSource() { - if (supportsFromSource()) { - return new FromSource(alias, name, types); - } - throw new IllegalStateException("Field (alias [" + alias + "], name [" + name + "]) should be extracted via [" - + extractionMethod + "] and cannot be extracted from source"); - } - - private static class FromFields extends ExtractedField { - - FromFields(String alias, String name, Set types, ExtractionMethod extractionMethod) { - super(alias, name, types, extractionMethod); - } - - @Override - public Object[] value(SearchHit hit) { - DocumentField keyValue = hit.field(name); - if (keyValue != null) { - List values = keyValue.getValues(); - return values.toArray(new Object[0]); - } - return new Object[0]; - } - - @Override - public boolean supportsFromSource() { - return getExtractionMethod() == ExtractionMethod.DOC_VALUE; - } - } - - private static class GeoShapeField extends FromSource { - private static final WellKnownText wkt = new WellKnownText(true, new StandardValidator(true)); - - GeoShapeField(String alias, String name, Set types) { - super(alias, name, types); - } - - @Override - public Object[] value(SearchHit hit) { - Object[] value = super.value(hit); - if (value.length != 1) { - throw new IllegalStateException("Unexpected values for a geo_shape field: " + Arrays.toString(value)); - } - if (value[0] instanceof String) { - value[0] = handleString((String) value[0]); - } else if (value[0] instanceof Map) { - @SuppressWarnings("unchecked") - Map geoObject = (Map) value[0]; - value[0] = handleObject(geoObject); - } else { - throw new IllegalStateException("Unexpected value type for a geo_shape field: " + value[0].getClass()); - } - return value; - } - - private String handleString(String geoString) { - try { - if (geoString.startsWith("POINT")) { // Entry is of the form "POINT (-77.03653 38.897676)" - Geometry geometry = wkt.fromWKT(geoString); - if (geometry.type() != ShapeType.POINT) { - throw new IllegalArgumentException("Unexpected non-point geo_shape type: " + geometry.type().name()); - } - Point pt = ((Point)geometry); - return pt.getY() + "," + pt.getX(); - } else { - throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString); - } - } catch (IOException | ParseException ex) { - throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString); - } - } - - private String handleObject(Map geoObject) { - String geoType = (String) geoObject.get("type"); - if (geoType != null && "point".equals(geoType.toLowerCase(Locale.ROOT))) { - @SuppressWarnings("unchecked") - List coordinates = (List) geoObject.get("coordinates"); - if (coordinates == null || coordinates.size() != 2) { - throw new IllegalArgumentException("Invalid coordinates for geo_shape point: " + geoObject); - } - return coordinates.get(1) + "," + coordinates.get(0); - } else { - throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoObject); - } - } - - } - - private static class GeoPointField extends FromFields { - - GeoPointField(String alias, String name, Set types) { - super(alias, name, types, ExtractionMethod.DOC_VALUE); - } - - @Override - public Object[] value(SearchHit hit) { - Object[] value = super.value(hit); - if (value.length != 1) { - throw new IllegalStateException("Unexpected values for a geo_point field: " + Arrays.toString(value)); - } - if (value[0] instanceof String) { - value[0] = handleString((String) value[0]); - } else { - throw new IllegalStateException("Unexpected value type for a geo_point field: " + value[0].getClass()); - } - return value; - } - - private String handleString(String geoString) { - if (geoString.contains(",")) { // Entry is of the form "38.897676, -77.03653" - return geoString.replace(" ", ""); - } else { - throw new IllegalArgumentException("Unexpected value for a geo_point field: " + geoString); - } - } - - @Override - public boolean supportsFromSource() { - return false; - } - } - - private static class TimeField extends FromFields { - - private static final String EPOCH_MILLIS_FORMAT = "epoch_millis"; - - TimeField(String name, Set types, ExtractionMethod extractionMethod) { - super(name, name, types, extractionMethod); - } - - @Override - public Object[] value(SearchHit hit) { - Object[] value = super.value(hit); - if (value.length != 1) { - return value; - } - if (value[0] instanceof String) { // doc_value field with the epoch_millis format - value[0] = Long.parseLong((String) value[0]); - } else if (value[0] instanceof Long == false) { // pre-6.0 field - throw new IllegalStateException("Unexpected value for a time field: " + value[0].getClass()); - } - return value; - } - - @Override - public String getDocValueFormat() { - return EPOCH_MILLIS_FORMAT; - } - - @Override - public boolean supportsFromSource() { - return false; - } - } - - private static class FromSource extends ExtractedField { - - private String[] namePath; - - FromSource(String alias, String name, Set types) { - super(alias, name, types, ExtractionMethod.SOURCE); - namePath = name.split("\\."); - } - - @Override - public Object[] value(SearchHit hit) { - Map source = hit.getSourceAsMap(); - int level = 0; - while (source != null && level < namePath.length - 1) { - source = getNextLevel(source, namePath[level]); - level++; - } - if (source != null) { - Object values = source.get(namePath[level]); - if (values != null) { - if (values instanceof List) { - @SuppressWarnings("unchecked") - List asList = (List) values; - return asList.toArray(new Object[0]); - } else { - return new Object[]{values}; - } - } - } - return new Object[0]; - } - - @Override - public boolean supportsFromSource() { - return true; - } - - @SuppressWarnings("unchecked") - private static Map getNextLevel(Map source, String key) { - Object nextLevel = source.get(key); - if (nextLevel instanceof Map) { - return (Map) source.get(key); - } - return null; - } + /** + * @return The name of the field as expected by the user + */ + String getName(); + + /** + * This is the name of the field we should search for. + * In most cases this is the same as {@link #getName()}. + * However, if the field is a non-aggregatable multi-field + * we cannot retrieve it from source. Thus we search for + * its parent instead. + * @return The name of the field that is searched. + */ + String getSearchField(); + + /** + * @return The field types + */ + Set getTypes(); + + /** + * @return The extraction {@link Method} + */ + Method getMethod(); + + /** + * Extracts the value from a {@link SearchHit} + * @param hit the search hit + * @return the extracted value + */ + Object[] value(SearchHit hit); + + /** + * @return Whether the field can be fetched from source instead + */ + boolean supportsFromSource(); + + /** + * @return A new extraction field that's fetching from source + */ + ExtractedField newFromSource(); + + /** + * @return Whether it is a multi-field + */ + boolean isMultiField(); + + /** + * @return The multi-field parent + */ + default String getParentField() { + throw new UnsupportedOperationException(); + } + + /** + * @return The doc_value format + */ + default String getDocValueFormat() { + throw new UnsupportedOperationException(); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java index a5f5b695172a9..9fe079b745c10 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ExtractedFields.java @@ -7,6 +7,9 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.common.document.DocumentField; +import org.elasticsearch.index.mapper.BooleanFieldMapper; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.xpack.core.ml.utils.MlStrings; import java.util.Collection; @@ -22,16 +25,14 @@ */ public class ExtractedFields { - private static final String TEXT = "text"; - private final List allFields; private final List docValueFields; private final String[] sourceFields; public ExtractedFields(List allFields) { this.allFields = Collections.unmodifiableList(allFields); - this.docValueFields = filterFields(ExtractedField.ExtractionMethod.DOC_VALUE, allFields); - this.sourceFields = filterFields(ExtractedField.ExtractionMethod.SOURCE, allFields).stream().map(ExtractedField::getName) + this.docValueFields = filterFields(ExtractedField.Method.DOC_VALUE, allFields); + this.sourceFields = filterFields(ExtractedField.Method.SOURCE, allFields).stream().map(ExtractedField::getSearchField) .toArray(String[]::new); } @@ -47,8 +48,8 @@ public List getDocValueFields() { return docValueFields; } - private static List filterFields(ExtractedField.ExtractionMethod method, List fields) { - return fields.stream().filter(field -> field.getExtractionMethod() == method).collect(Collectors.toList()); + private static List filterFields(ExtractedField.Method method, List fields) { + return fields.stream().filter(field -> field.getMethod() == method).collect(Collectors.toList()); } public static ExtractedFields build(Collection allFields, Set scriptFields, @@ -57,6 +58,14 @@ public static ExtractedFields build(Collection allFields, Set sc return new ExtractedFields(allFields.stream().map(field -> extractionMethodDetector.detect(field)).collect(Collectors.toList())); } + public static TimeField newTimeField(String name, ExtractedField.Method method) { + return new TimeField(name, method); + } + + public static ExtractedField applyBooleanMapping(ExtractedField field, T trueValue, T falseValue) { + return new BooleanMapper<>(field, trueValue, falseValue); + } + public static class ExtractionMethodDetector { private final Set scriptFields; @@ -68,38 +77,37 @@ public ExtractionMethodDetector(Set scriptFields, FieldCapabilitiesRespo } public ExtractedField detect(String field) { - String internalField = field; - ExtractedField.ExtractionMethod method = ExtractedField.ExtractionMethod.SOURCE; - Set types = getTypes(field); if (scriptFields.contains(field)) { - method = ExtractedField.ExtractionMethod.SCRIPT_FIELD; - } else if (isAggregatable(field)) { - method = ExtractedField.ExtractionMethod.DOC_VALUE; - if (isFieldOfType(field, "date")) { - return ExtractedField.newTimeField(field, types, method); - } - } else if (isFieldOfType(field, TEXT)) { - String parentField = MlStrings.getParentField(field); - // Field is text so check if it is a multi-field - if (Objects.equals(parentField, field) == false && fieldsCapabilities.getField(parentField) != null) { - // Field is a multi-field which means it won't be available in source. Let's take the parent instead. - internalField = parentField; - method = isAggregatable(parentField) ? ExtractedField.ExtractionMethod.DOC_VALUE - : ExtractedField.ExtractionMethod.SOURCE; + return new ScriptField(field); + } + ExtractedField extractedField = detectNonScriptField(field); + String parentField = MlStrings.getParentField(field); + if (isMultiField(field, parentField)) { + if (isAggregatable(field)) { + return new MultiField(parentField, extractedField); + } else { + ExtractedField parentExtractionField = detectNonScriptField(parentField); + return new MultiField(field, parentField, parentField, parentExtractionField); } } + return extractedField; + } - if (isFieldOfType(field, "geo_point")) { - if (method != ExtractedField.ExtractionMethod.DOC_VALUE) { + private ExtractedField detectNonScriptField(String field) { + if (isFieldOfType(field, TimeField.TYPE) && isAggregatable(field)) { + return new TimeField(field, ExtractedField.Method.DOC_VALUE); + } + if (isFieldOfType(field, GeoPointField.TYPE)) { + if (isAggregatable(field) == false) { throw new IllegalArgumentException("cannot use [geo_point] field with disabled doc values"); } - return ExtractedField.newGeoPointField(field, internalField); + return new GeoPointField(field); } - if (isFieldOfType(field, "geo_shape")) { - return ExtractedField.newGeoShapeField(field, internalField); + if (isFieldOfType(field, GeoShapeField.TYPE)) { + return new GeoShapeField(field); } - - return ExtractedField.newField(field, internalField, types, method); + Set types = getTypes(field); + return isAggregatable(field) ? new DocValueField(field, types) : new SourceField(field, types); } private Set getTypes(String field) { @@ -127,5 +135,57 @@ private boolean isFieldOfType(String field, String type) { } return false; } + + private boolean isMultiField(String field, String parent) { + if (Objects.equals(field, parent)) { + return false; + } + Map parentFieldCaps = fieldsCapabilities.getField(parent); + if (parentFieldCaps == null || (parentFieldCaps.size() == 1 && parentFieldCaps.containsKey("object"))) { + // We check if the parent is an object which is indicated by field caps containing an "object" entry. + // If an object, it's not a multi field + return false; + } + return true; + } + } + + /** + * Makes boolean fields behave as a field of different type. + */ + private static final class BooleanMapper extends DocValueField { + + private static final Set TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE); + + private final T trueValue; + private final T falseValue; + + BooleanMapper(ExtractedField field, T trueValue, T falseValue) { + super(field.getName(), TYPES); + if (field.getMethod() != Method.DOC_VALUE || field.getTypes().contains(BooleanFieldMapper.CONTENT_TYPE) == false) { + throw new IllegalArgumentException("cannot apply boolean mapping to field [" + field.getName() + "]"); + } + this.trueValue = trueValue; + this.falseValue = falseValue; + } + + @Override + public Object[] value(SearchHit hit) { + DocumentField keyValue = hit.field(getName()); + if (keyValue != null) { + return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray(); + } + return new Object[0]; + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoPointField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoPointField.java new file mode 100644 index 0000000000000..e39ac7859c2c3 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoPointField.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.Arrays; +import java.util.Collections; +import java.util.Set; + +public class GeoPointField extends DocValueField { + + static final String TYPE = "geo_point"; + + private static final Set TYPES = Collections.singleton(TYPE); + + public GeoPointField(String name) { + super(name, TYPES); + } + + @Override + public Object[] value(SearchHit hit) { + Object[] value = super.value(hit); + if (value.length != 1) { + throw new IllegalStateException("Unexpected values for a geo_point field: " + Arrays.toString(value)); + } + if (value[0] instanceof String) { + value[0] = handleString((String) value[0]); + } else { + throw new IllegalStateException("Unexpected value type for a geo_point field: " + value[0].getClass()); + } + return value; + } + + private String handleString(String geoString) { + if (geoString.contains(",")) { // Entry is of the form "38.897676, -77.03653" + return geoString.replace(" ", ""); + } else { + throw new IllegalArgumentException("Unexpected value for a geo_point field: " + geoString); + } + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoShapeField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoShapeField.java new file mode 100644 index 0000000000000..5ff58712f6e78 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/GeoShapeField.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.geometry.Geometry; +import org.elasticsearch.geometry.Point; +import org.elasticsearch.geometry.ShapeType; +import org.elasticsearch.geometry.utils.StandardValidator; +import org.elasticsearch.geometry.utils.WellKnownText; +import org.elasticsearch.search.SearchHit; + +import java.io.IOException; +import java.text.ParseException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +public class GeoShapeField extends SourceField { + + static final String TYPE = "geo_shape"; + + private static final Set TYPES = Collections.singleton(TYPE); + + private static final WellKnownText wkt = new WellKnownText(true, new StandardValidator(true)); + + public GeoShapeField(String name) { + super(name, TYPES); + } + + @Override + public Object[] value(SearchHit hit) { + Object[] value = super.value(hit); + if (value.length != 1) { + throw new IllegalStateException("Unexpected values for a geo_shape field: " + Arrays.toString(value)); + } + if (value[0] instanceof String) { + value[0] = handleString((String) value[0]); + } else if (value[0] instanceof Map) { + @SuppressWarnings("unchecked") + Map geoObject = (Map) value[0]; + value[0] = handleObject(geoObject); + } else { + throw new IllegalStateException("Unexpected value type for a geo_shape field: " + value[0].getClass()); + } + return value; + } + + private String handleString(String geoString) { + try { + if (geoString.startsWith("POINT")) { // Entry is of the form "POINT (-77.03653 38.897676)" + Geometry geometry = wkt.fromWKT(geoString); + if (geometry.type() != ShapeType.POINT) { + throw new IllegalArgumentException("Unexpected non-point geo_shape type: " + geometry.type().name()); + } + Point pt = ((Point)geometry); + return pt.getY() + "," + pt.getX(); + } else { + throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString); + } + } catch (IOException | ParseException ex) { + throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString); + } + } + + private String handleObject(Map geoObject) { + String geoType = (String) geoObject.get("type"); + if (geoType != null && "point".equals(geoType.toLowerCase(Locale.ROOT))) { + @SuppressWarnings("unchecked") + List coordinates = (List) geoObject.get("coordinates"); + if (coordinates == null || coordinates.size() != 2) { + throw new IllegalArgumentException("Invalid coordinates for geo_shape point: " + geoObject); + } + return coordinates.get(1) + "," + coordinates.get(0); + } else { + throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoObject); + } + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/MultiField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/MultiField.java new file mode 100644 index 0000000000000..e41905a5faddd --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/MultiField.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.Objects; +import java.util.Set; + +public class MultiField implements ExtractedField { + + private final String name; + private final String searchField; + private final ExtractedField field; + private final String parent; + + MultiField(String parent, ExtractedField field) { + this(field.getName(), field.getSearchField(), parent, field); + } + + MultiField(String name, String searchField, String parent, ExtractedField field) { + this.name = Objects.requireNonNull(name); + this.searchField = Objects.requireNonNull(searchField); + this.field = Objects.requireNonNull(field); + this.parent = Objects.requireNonNull(parent); + } + + @Override + public String getName() { + return name; + } + + @Override + public String getSearchField() { + return searchField; + } + + @Override + public Set getTypes() { + return field.getTypes(); + } + + @Override + public Method getMethod() { + return field.getMethod(); + } + + @Override + public Object[] value(SearchHit hit) { + return field.value(hit); + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isMultiField() { + return true; + } + + @Override + public String getParentField() { + return parent; + } + + @Override + public String getDocValueFormat() { + return field.getDocValueFormat(); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ScriptField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ScriptField.java new file mode 100644 index 0000000000000..3c06c74bc02df --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/ScriptField.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.Collections; + +public class ScriptField extends AbstractField { + + public ScriptField(String name) { + super(name, Collections.emptySet()); + } + + @Override + public Method getMethod() { + return Method.SCRIPT_FIELD; + } + + @Override + public Object[] value(SearchHit hit) { + return getFieldValue(hit); + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isMultiField() { + return false; + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/SourceField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/SourceField.java new file mode 100644 index 0000000000000..f70e6e59c05c2 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/SourceField.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class SourceField extends AbstractField { + + private final String[] path; + + public SourceField(String name, Set types) { + super(name, types); + path = name.split("\\."); + } + + @Override + public Method getMethod() { + return Method.SOURCE; + } + + @Override + public Object[] value(SearchHit hit) { + Map source = hit.getSourceAsMap(); + int level = 0; + while (source != null && level < path.length - 1) { + source = getNextLevel(source, path[level]); + level++; + } + if (source != null) { + Object values = source.get(path[level]); + if (values != null) { + if (values instanceof List) { + @SuppressWarnings("unchecked") + List asList = (List) values; + return asList.toArray(new Object[0]); + } else { + return new Object[]{values}; + } + } + } + return new Object[0]; + } + + @SuppressWarnings("unchecked") + private static Map getNextLevel(Map source, String key) { + Object nextLevel = source.get(key); + if (nextLevel instanceof Map) { + return (Map) source.get(key); + } + return null; + } + + @Override + public boolean supportsFromSource() { + return true; + } + + @Override + public ExtractedField newFromSource() { + return this; + } + + @Override + public boolean isMultiField() { + return false; + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java new file mode 100644 index 0000000000000..7d8e03fab9011 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/extractor/TimeField.java @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +public class TimeField extends AbstractField { + + static final String TYPE = "date"; + + private static final Set TYPES = Collections.singleton(TYPE); + + private static final String EPOCH_MILLIS_FORMAT = "epoch_millis"; + + private final Method method; + + public TimeField(String name, Method method) { + super(name, TYPES); + if (method == Method.SOURCE) { + throw new IllegalArgumentException("time field [" + name + "] cannot be extracted from source"); + } + this.method = Objects.requireNonNull(method); + } + + @Override + public Method getMethod() { + return method; + } + + @Override + public Object[] value(SearchHit hit) { + Object[] value = getFieldValue(hit); + if (value.length != 1) { + return value; + } + if (value[0] instanceof String) { // doc_value field with the epoch_millis format + value[0] = Long.parseLong((String) value[0]); + } else if (value[0] instanceof Long == false) { // pre-6.0 field + throw new IllegalStateException("Unexpected value for a time field: " + value[0].getClass()); + } + return value; + } + + @Override + public String getDocValueFormat() { + if (method != Method.DOC_VALUE) { + throw new UnsupportedOperationException(); + } + return EPOCH_MILLIS_FORMAT; + } + + @Override + public boolean supportsFromSource() { + return false; + } + + @Override + public ExtractedField newFromSource() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isMultiField() { + return false; + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractorTests.java index 136c045d0b7be..962bd9ee6d3df 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/ScrollDataExtractorTests.java @@ -32,7 +32,9 @@ import org.elasticsearch.xpack.core.ml.datafeed.DatafeedTimingStats; import org.elasticsearch.xpack.ml.datafeed.DatafeedTimingStatsReporter; import org.elasticsearch.xpack.ml.datafeed.DatafeedTimingStatsReporter.DatafeedTimingStatsPersister; +import org.elasticsearch.xpack.ml.extractor.DocValueField; import org.elasticsearch.xpack.ml.extractor.ExtractedField; +import org.elasticsearch.xpack.ml.extractor.TimeField; import org.junit.Before; import org.mockito.ArgumentCaptor; @@ -134,11 +136,9 @@ public void setUpTests() { capturedSearchRequests = new ArrayList<>(); capturedContinueScrollIds = new ArrayList<>(); jobId = "test-job"; - ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); + ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); extractedFields = new TimeBasedExtractedFields(timeField, - Arrays.asList(timeField, ExtractedField.newField("field_1", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE))); + Arrays.asList(timeField, new DocValueField("field_1", Collections.singleton("keyword")))); indices = Arrays.asList("index-1", "index-2"); query = QueryBuilders.matchAllQuery(); scriptFields = Collections.emptyList(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessorTests.java index f2070d1aea584..f8d358f36e9d1 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/SearchHitToJsonProcessorTests.java @@ -7,8 +7,10 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.extractor.DocValueField; import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; +import org.elasticsearch.xpack.ml.extractor.TimeField; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import java.io.ByteArrayOutputStream; @@ -22,14 +24,10 @@ public class SearchHitToJsonProcessorTests extends ESTestCase { public void testProcessGivenSingleHit() throws IOException { - ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField missingField = ExtractedField.newField("missing", Collections.singleton("float"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField singleField = ExtractedField.newField("single", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField arrayField = ExtractedField.newField("array", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); + ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); + ExtractedField missingField = new DocValueField("missing", Collections.singleton("float")); + ExtractedField singleField = new DocValueField("single", Collections.singleton("keyword")); + ExtractedField arrayField = new DocValueField("array", Collections.singleton("keyword")); TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField, Arrays.asList(timeField, missingField, singleField, arrayField)); @@ -45,14 +43,10 @@ public void testProcessGivenSingleHit() throws IOException { } public void testProcessGivenMultipleHits() throws IOException { - ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField missingField = ExtractedField.newField("missing", Collections.singleton("float"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField singleField = ExtractedField.newField("single", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField arrayField = ExtractedField.newField("array", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); + ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); + ExtractedField missingField = new DocValueField("missing", Collections.singleton("float")); + ExtractedField singleField = new DocValueField("single", Collections.singleton("keyword")); + ExtractedField arrayField = new DocValueField("array", Collections.singleton("keyword")); TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField, Arrays.asList(timeField, missingField, singleField, arrayField)); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFieldsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFieldsTests.java index 34b296cf93fc5..e5958879aaebe 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFieldsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/scroll/TimeBasedExtractedFieldsTests.java @@ -15,7 +15,12 @@ import org.elasticsearch.xpack.core.ml.job.config.DataDescription; import org.elasticsearch.xpack.core.ml.job.config.Detector; import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.ml.extractor.DocValueField; import org.elasticsearch.xpack.ml.extractor.ExtractedField; +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; +import org.elasticsearch.xpack.ml.extractor.ScriptField; +import org.elasticsearch.xpack.ml.extractor.SourceField; +import org.elasticsearch.xpack.ml.extractor.TimeField; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import java.util.Arrays; @@ -30,8 +35,7 @@ public class TimeBasedExtractedFieldsTests extends ESTestCase { - private ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); + private ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); public void testInvalidConstruction() { expectThrows(IllegalArgumentException.class, () -> new TimeBasedExtractedFields(timeField, Collections.emptyList())); @@ -48,18 +52,12 @@ public void testTimeFieldOnly() { } public void testAllTypesOfFields() { - ExtractedField docValue1 = ExtractedField.newField("doc1", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField docValue2 = ExtractedField.newField("doc2", Collections.singleton("float"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField scriptField1 = ExtractedField.newField("scripted1", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - ExtractedField scriptField2 = ExtractedField.newField("scripted2", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - ExtractedField sourceField1 = ExtractedField.newField("src1", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); - ExtractedField sourceField2 = ExtractedField.newField("src2", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); + ExtractedField docValue1 = new DocValueField("doc1", Collections.singleton("keyword")); + ExtractedField docValue2 = new DocValueField("doc2", Collections.singleton("float")); + ExtractedField scriptField1 = new ScriptField("scripted1"); + ExtractedField scriptField2 = new ScriptField("scripted2"); + ExtractedField sourceField1 = new SourceField("src1", Collections.singleton("text")); + ExtractedField sourceField2 = new SourceField("src2", Collections.singleton("text")); TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField, Arrays.asList(timeField, docValue1, docValue2, scriptField1, scriptField2, sourceField1, sourceField2)); @@ -182,12 +180,9 @@ public void testBuildGivenMultiFields() { assertThat(extractedFields.getSourceFields()[0], equalTo("airline")); assertThat(extractedFields.getAllFields().size(), equalTo(3)); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("time")).findFirst().get().getAlias(), - equalTo("time")); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airport.keyword")).findFirst().get().getAlias(), - equalTo("airport.keyword")); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airline")).findFirst().get().getAlias(), - equalTo("airline.text")); + assertThat(findField("time", extractedFields).getSearchField(), equalTo("time")); + assertThat(findField("airport.keyword", extractedFields).getSearchField(), equalTo("airport.keyword")); + assertThat(findField("airline.text", extractedFields).getSearchField(), equalTo("airline")); } public void testBuildGivenTimeFieldIsNotAggregatable() { @@ -253,4 +248,8 @@ private static FieldCapabilities createFieldCaps(boolean isAggregatable) { when(fieldCaps.isAggregatable()).thenReturn(isAggregatable); return fieldCaps; } + + private static ExtractedField findField(String name, ExtractedFields fields) { + return fields.getAllFields().stream().filter(f -> f.getName().equals(name)).findFirst().get(); + } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorTests.java index 0915afcc75dfb..fe9dc68755d6e 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorTests.java @@ -27,8 +27,9 @@ import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification; import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetectionTests; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; -import org.elasticsearch.xpack.ml.extractor.ExtractedField; +import org.elasticsearch.xpack.ml.extractor.DocValueField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; +import org.elasticsearch.xpack.ml.extractor.SourceField; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import org.junit.Before; import org.mockito.ArgumentCaptor; @@ -78,8 +79,8 @@ public void setUpTests() { indices = Arrays.asList("index-1", "index-2"); query = QueryBuilders.matchAllQuery(); extractedFields = new ExtractedFields(Arrays.asList( - ExtractedField.newField("field_1", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_2", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE))); + new DocValueField("field_1", Collections.singleton("keyword")), + new DocValueField("field_2", Collections.singleton("keyword")))); scrollSize = 1000; headers = Collections.emptyMap(); @@ -295,8 +296,8 @@ public void testIncludeSourceIsFalseAndNoSourceFields() throws IOException { public void testIncludeSourceIsFalseAndAtLeastOneSourceField() throws IOException { extractedFields = new ExtractedFields(Arrays.asList( - ExtractedField.newField("field_1", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_2", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE))); + new DocValueField("field_1", Collections.singleton("keyword")), + new SourceField("field_2", Collections.singleton("text")))); TestExtractor dataExtractor = createExtractor(false, false); @@ -391,15 +392,15 @@ public void testMissingValues_GivenShouldInclude() throws IOException { public void testGetCategoricalFields() { extractedFields = new ExtractedFields(Arrays.asList( - ExtractedField.newField("field_boolean", Collections.singleton("boolean"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_float", Collections.singleton("float"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_double", Collections.singleton("double"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_byte", Collections.singleton("byte"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_short", Collections.singleton("short"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_integer", Collections.singleton("integer"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_long", Collections.singleton("long"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_keyword", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE), - ExtractedField.newField("field_text", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE))); + new DocValueField("field_boolean", Collections.singleton("boolean")), + new DocValueField("field_float", Collections.singleton("float")), + new DocValueField("field_double", Collections.singleton("double")), + new DocValueField("field_byte", Collections.singleton("byte")), + new DocValueField("field_short", Collections.singleton("short")), + new DocValueField("field_integer", Collections.singleton("integer")), + new DocValueField("field_long", Collections.singleton("long")), + new DocValueField("field_keyword", Collections.singleton("keyword")), + new SourceField("field_text", Collections.singleton("text")))); TestExtractor dataExtractor = createExtractor(true, true); assertThat(dataExtractor.getCategoricalFields(OutlierDetectionTests.createRandom()), empty()); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java index 053620d14cbaf..ce819e9e6d84a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java @@ -53,7 +53,7 @@ public void testDetect_GivenFloatField() { List allFields = extractedFields.getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_float")); - assertThat(allFields.get(0).getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)); + assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); } public void testDetect_GivenNumericFieldWithMultipleTypes() { @@ -68,7 +68,7 @@ public void testDetect_GivenNumericFieldWithMultipleTypes() { List allFields = extractedFields.getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_number")); - assertThat(allFields.get(0).getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)); + assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); } public void testDetect_GivenOutlierDetectionAndNonNumericField() { @@ -111,8 +111,8 @@ public void testDetect_GivenOutlierDetectionAndMultipleFields() { assertThat(allFields.size(), equalTo(3)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()), containsInAnyOrder("some_float", "some_long", "some_boolean")); - assertThat(allFields.stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE))); + assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.DOC_VALUE))); } public void testDetect_GivenRegressionAndMultipleFields() { @@ -132,8 +132,8 @@ public void testDetect_GivenRegressionAndMultipleFields() { assertThat(allFields.size(), equalTo(5)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean")); - assertThat(allFields.stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE))); + assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.DOC_VALUE))); } public void testDetect_GivenRegressionAndRequiredFieldMissing() { @@ -442,8 +442,8 @@ public void testDetect_GivenLessFieldsThanDocValuesLimit() { List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE))); + assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.DOC_VALUE))); } public void testDetect_GivenEqualFieldsToDocValuesLimit() { @@ -461,8 +461,8 @@ public void testDetect_GivenEqualFieldsToDocValuesLimit() { List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE))); + assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.DOC_VALUE))); } public void testDetect_GivenMoreFieldsThanDocValuesLimit() { @@ -480,8 +480,8 @@ public void testDetect_GivenMoreFieldsThanDocValuesLimit() { List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()), - contains(equalTo(ExtractedField.ExtractionMethod.SOURCE))); + assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + contains(equalTo(ExtractedField.Method.SOURCE))); } public void testDetect_GivenBooleanField_BooleanMappedAsInteger() { @@ -497,7 +497,7 @@ public void testDetect_GivenBooleanField_BooleanMappedAsInteger() { assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); - assertThat(booleanField.getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)); + assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining(1)); @@ -522,7 +522,7 @@ public void testDetect_GivenBooleanField_BooleanMappedAsString() { assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); - assertThat(booleanField.getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)); + assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining("true")); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/DocValueFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/DocValueFieldTests.java new file mode 100644 index 0000000000000..4cb8c4126c095 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/DocValueFieldTests.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import java.util.Arrays; +import java.util.Collections; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class DocValueFieldTests extends ESTestCase { + + public void testKeyword() { + SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build(); + + ExtractedField field = new DocValueField("a_keyword", Collections.singleton("keyword")); + + assertThat(field.value(hit), equalTo(new String[] { "bar" })); + assertThat(field.getName(), equalTo("a_keyword")); + assertThat(field.getSearchField(), equalTo("a_keyword")); + assertThat(field.getTypes(), contains("keyword")); + assertThat(field.getDocValueFormat(), is(nullValue())); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(field.supportsFromSource(), is(true)); + assertThat(field.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + } + + public void testKeywordArray() { + SearchHit hit = new SearchHitBuilder(42).addField("array", Arrays.asList("a", "b")).build(); + + ExtractedField field = new DocValueField("array", Collections.singleton("keyword")); + + assertThat(field.value(hit), equalTo(new String[] { "a", "b" })); + assertThat(field.getName(), equalTo("array")); + assertThat(field.getSearchField(), equalTo("array")); + assertThat(field.getTypes(), contains("keyword")); + assertThat(field.getDocValueFormat(), is(nullValue())); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(field.supportsFromSource(), is(true)); + assertThat(field.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + + ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword")); + assertThat(missing.value(hit), equalTo(new Object[0])); + } + + public void testMissing() { + SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build(); + + ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword")); + + assertThat(missing.value(hit), equalTo(new Object[0])); + } + + public void testNewFromSource() { + ExtractedField field = new DocValueField("foo", Collections.singleton("keyword")); + + ExtractedField fromSource = field.newFromSource(); + + assertThat(fromSource.getName(), equalTo("foo")); + assertThat(fromSource.getSearchField(), equalTo("foo")); + assertThat(fromSource.getTypes(), contains("keyword")); + expectThrows(UnsupportedOperationException.class, () -> fromSource.getDocValueFormat()); + assertThat(fromSource.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(fromSource.supportsFromSource(), is(true)); + assertThat(fromSource.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> fromSource.getParentField()); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldTests.java deleted file mode 100644 index 92d82ead05bca..0000000000000 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldTests.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.extractor; - -import org.elasticsearch.search.SearchHit; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.ml.test.SearchHitBuilder; - -import java.util.Arrays; -import java.util.Collections; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.startsWith; - -public class ExtractedFieldTests extends ESTestCase { - - public void testValueGivenDocValue() { - SearchHit hit = new SearchHitBuilder(42).addField("single", "bar").addField("array", Arrays.asList("a", "b")).build(); - - ExtractedField single = ExtractedField.newField("single", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(single.value(hit), equalTo(new String[] { "bar" })); - - ExtractedField array = ExtractedField.newField("array", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(array.value(hit), equalTo(new String[] { "a", "b" })); - - ExtractedField missing = ExtractedField.newField("missing",Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(missing.value(hit), equalTo(new Object[0])); - } - - public void testValueGivenScriptField() { - SearchHit hit = new SearchHitBuilder(42).addField("single", "bar").addField("array", Arrays.asList("a", "b")).build(); - - ExtractedField single = ExtractedField.newField("single",Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - assertThat(single.value(hit), equalTo(new String[] { "bar" })); - - ExtractedField array = ExtractedField.newField("array", Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD); - assertThat(array.value(hit), equalTo(new String[] { "a", "b" })); - - ExtractedField missing = ExtractedField.newField("missing", Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD); - assertThat(missing.value(hit), equalTo(new Object[0])); - } - - public void testValueGivenSource() { - SearchHit hit = new SearchHitBuilder(42).setSource("{\"single\":\"bar\",\"array\":[\"a\",\"b\"]}").build(); - - ExtractedField single = ExtractedField.newField("single", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(single.value(hit), equalTo(new String[] { "bar" })); - - ExtractedField array = ExtractedField.newField("array", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(array.value(hit), equalTo(new String[] { "a", "b" })); - - ExtractedField missing = ExtractedField.newField("missing", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(missing.value(hit), equalTo(new Object[0])); - } - - public void testValueGivenNestedSource() { - SearchHit hit = new SearchHitBuilder(42).setSource("{\"level_1\":{\"level_2\":{\"foo\":\"bar\"}}}").build(); - - ExtractedField nested = ExtractedField.newField("alias", "level_1.level_2.foo", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); - assertThat(nested.value(hit), equalTo(new String[] { "bar" })); - } - - public void testGeoPoint() { - double lat = 38.897676; - double lon = -77.03653; - String[] expected = new String[] {lat + "," + lon}; - - // doc_value field - ExtractedField geo = ExtractedField.newGeoPointField("geo", "geo"); - SearchHit hit = new SearchHitBuilder(42).addField("geo", lat + ", " + lon).build(); - assertThat(geo.value(hit), equalTo(expected)); - } - - public void testGeoShape() { - double lat = 38.897676; - double lon = -77.03653; - String[] expected = new String[] {lat + "," + lon}; - // object format - SearchHit hit = new SearchHitBuilder(42) - .setSource("{\"geo\":{\"type\":\"point\", \"coordinates\": [" + lon + ", " + lat + "]}}") - .build(); - ExtractedField geo = ExtractedField.newGeoShapeField("geo", "geo"); - assertThat(geo.value(hit), equalTo(expected)); - - // WKT format - hit = new SearchHitBuilder(42).setSource("{\"geo\":\"POINT ("+ lon + " " + lat + ")\"}").build(); - geo = ExtractedField.newGeoShapeField("geo", "geo"); - assertThat(geo.value(hit), equalTo(expected)); - } - - public void testValueGivenSourceAndHitWithNoSource() { - ExtractedField missing = ExtractedField.newField("missing", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(missing.value(new SearchHitBuilder(3).build()), equalTo(new Object[0])); - } - - public void testValueGivenMismatchingMethod() { - SearchHit hit = new SearchHitBuilder(42).addField("a", 1).setSource("{\"b\":2}").build(); - - ExtractedField invalidA = ExtractedField.newField("a", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(invalidA.value(hit), equalTo(new Object[0])); - ExtractedField validA = ExtractedField.newField("a", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(validA.value(hit), equalTo(new Integer[] { 1 })); - - ExtractedField invalidB = ExtractedField.newField("b", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(invalidB.value(hit), equalTo(new Object[0])); - ExtractedField validB = ExtractedField.newField("b", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(validB.value(hit), equalTo(new Integer[] { 2 })); - } - - public void testValueGivenEmptyHit() { - SearchHit hit = new SearchHitBuilder(42).build(); - - ExtractedField docValue = ExtractedField.newField("a", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE); - assertThat(docValue.value(hit), equalTo(new Object[0])); - - ExtractedField sourceField = ExtractedField.newField("b", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(sourceField.value(hit), equalTo(new Object[0])); - } - - public void testNewTimeFieldGivenSource() { - expectThrows(IllegalArgumentException.class, () -> ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.SOURCE)); - } - - public void testValueGivenStringTimeField() { - final long millis = randomLong(); - final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", Long.toString(millis)).build(); - final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(timeField.value(hit), equalTo(new Object[] { millis })); - } - - public void testValueGivenLongTimeField() { - final long millis = randomLong(); - final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build(); - final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(timeField.value(hit), equalTo(new Object[] { millis })); - } - - public void testValueGivenPre6xTimeField() { - // Prior to 6.x, timestamps were simply `long` milliseconds-past-the-epoch values - final long millis = randomLong(); - final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build(); - final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(timeField.value(hit), equalTo(new Object[] { millis })); - } - - public void testValueGivenUnknownFormatTimeField() { - final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", new Object()).build(); - final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(expectThrows(IllegalStateException.class, () -> timeField.value(hit)).getMessage(), - startsWith("Unexpected value for a time field")); - } - - public void testAliasVersusName() { - SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("b", 2).build(); - - ExtractedField field = ExtractedField.newField("a", "a", Collections.singleton("int"), - ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(field.getAlias(), equalTo("a")); - assertThat(field.getName(), equalTo("a")); - assertThat(field.value(hit), equalTo(new Integer[] { 1 })); - - hit = new SearchHitBuilder(42).addField("a", 1).addField("b", 2).build(); - - field = ExtractedField.newField("a", "b", Collections.singleton("int"), ExtractedField.ExtractionMethod.DOC_VALUE); - assertThat(field.getAlias(), equalTo("a")); - assertThat(field.getName(), equalTo("b")); - assertThat(field.value(hit), equalTo(new Integer[] { 2 })); - } - - public void testGetDocValueFormat() { - for (ExtractedField.ExtractionMethod method : ExtractedField.ExtractionMethod.values()) { - assertThat(ExtractedField.newField("f", Collections.emptySet(), method).getDocValueFormat(), equalTo(null)); - } - assertThat(ExtractedField.newTimeField("doc_value_time", Collections.singleton("date"), - ExtractedField.ExtractionMethod.DOC_VALUE).getDocValueFormat(), equalTo("epoch_millis")); - assertThat(ExtractedField.newTimeField("source_time", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD).getDocValueFormat(), equalTo("epoch_millis")); - } -} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java index cee2a7154cf25..9613d14fb5f00 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ExtractedFieldsTests.java @@ -7,12 +7,9 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; -import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; -import org.elasticsearch.xpack.core.ml.job.config.DataDescription; -import org.elasticsearch.xpack.core.ml.job.config.Detector; -import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import java.util.Arrays; import java.util.Collections; @@ -21,24 +18,19 @@ import java.util.Map; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class ExtractedFieldsTests extends ESTestCase { public void testAllTypesOfFields() { - ExtractedField docValue1 = ExtractedField.newField("doc1", Collections.singleton("keyword"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField docValue2 = ExtractedField.newField("doc2", Collections.singleton("ip"), - ExtractedField.ExtractionMethod.DOC_VALUE); - ExtractedField scriptField1 = ExtractedField.newField("scripted1", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - ExtractedField scriptField2 = ExtractedField.newField("scripted2", Collections.emptySet(), - ExtractedField.ExtractionMethod.SCRIPT_FIELD); - ExtractedField sourceField1 = ExtractedField.newField("src1", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); - ExtractedField sourceField2 = ExtractedField.newField("src2", Collections.singleton("text"), - ExtractedField.ExtractionMethod.SOURCE); + ExtractedField docValue1 = new DocValueField("doc1", Collections.singleton("keyword")); + ExtractedField docValue2 = new DocValueField("doc2", Collections.singleton("ip")); + ExtractedField scriptField1 = new ScriptField("scripted1"); + ExtractedField scriptField2 = new ScriptField("scripted2"); + ExtractedField sourceField1 = new SourceField("src1", Collections.singleton("text")); + ExtractedField sourceField2 = new SourceField("src2", Collections.singleton("text")); ExtractedFields extractedFields = new ExtractedFields(Arrays.asList( docValue1, docValue2, scriptField1, scriptField2, sourceField1, sourceField2)); @@ -74,16 +66,6 @@ public void testBuildGivenMixtureOfTypes() { } public void testBuildGivenMultiFields() { - Job.Builder jobBuilder = new Job.Builder("foo"); - jobBuilder.setDataDescription(new DataDescription.Builder()); - Detector.Builder detector = new Detector.Builder("count", null); - detector.setByFieldName("airline.text"); - detector.setOverFieldName("airport.keyword"); - jobBuilder.setAnalysisConfig(new AnalysisConfig.Builder(Collections.singletonList(detector.build()))); - - DatafeedConfig.Builder datafeedBuilder = new DatafeedConfig.Builder("feed", jobBuilder.getId()); - datafeedBuilder.setIndices(Collections.singletonList("foo")); - Map text = new HashMap<>(); text.put("text", createFieldCaps(false)); Map keyword = new HashMap<>(); @@ -103,10 +85,34 @@ public void testBuildGivenMultiFields() { assertThat(extractedFields.getSourceFields()[0], equalTo("airline")); assertThat(extractedFields.getAllFields().size(), equalTo(2)); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airport.keyword")).findFirst().get().getAlias(), - equalTo("airport.keyword")); - assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airline")).findFirst().get().getAlias(), - equalTo("airline.text")); + ExtractedField airlineField = extractedFields.getAllFields().get(0); + assertThat(airlineField.isMultiField(), is(true)); + assertThat(airlineField.getName(), equalTo("airline.text")); + assertThat(airlineField.getSearchField(), equalTo("airline")); + assertThat(airlineField.getParentField(), equalTo("airline")); + + ExtractedField airportField = extractedFields.getAllFields().get(1); + assertThat(airportField.isMultiField(), is(true)); + assertThat(airportField.getName(), equalTo("airport.keyword")); + assertThat(airportField.getSearchField(), equalTo("airport.keyword")); + assertThat(airportField.getParentField(), equalTo("airport")); + } + + public void testApplyBooleanMapping() { + DocValueField aBool = new DocValueField("a_bool", Collections.singleton("boolean")); + + ExtractedField mapped = ExtractedFields.applyBooleanMapping(aBool, 1, 0); + + SearchHit hitTrue = new SearchHitBuilder(42).addField("a_bool", true).build(); + SearchHit hitFalse = new SearchHitBuilder(42).addField("a_bool", false).build(); + + assertThat(mapped.value(hitTrue), equalTo(new Integer[] { 1 })); + assertThat(mapped.value(hitFalse), equalTo(new Integer[] { 0 })); + + assertThat(mapped.getName(), equalTo(aBool.getName())); + assertThat(mapped.getMethod(), equalTo(aBool.getMethod())); + assertThat(mapped.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> mapped.newFromSource()); } public void testBuildGivenFieldWithoutMappings() { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoPointFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoPointFieldTests.java new file mode 100644 index 0000000000000..bd9b00254930d --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoPointFieldTests.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class GeoPointFieldTests extends ESTestCase { + + public void testGivenGeoPoint() { + double lat = 38.897676; + double lon = -77.03653; + String[] expected = new String[] {lat + "," + lon}; + SearchHit hit = new SearchHitBuilder(42).addField("geo", lat + ", " + lon).build(); + + // doc_value field + ExtractedField geo = new GeoPointField("geo"); + + assertThat(geo.value(hit), equalTo(expected)); + assertThat(geo.getName(), equalTo("geo")); + assertThat(geo.getSearchField(), equalTo("geo")); + assertThat(geo.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(geo.getTypes(), contains("geo_point")); + assertThat(geo.getDocValueFormat(), is(nullValue())); + assertThat(geo.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> geo.newFromSource()); + assertThat(geo.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> geo.getParentField()); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoShapeFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoShapeFieldTests.java new file mode 100644 index 0000000000000..79ac9849375cc --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/GeoShapeFieldTests.java @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; + +public class GeoShapeFieldTests extends ESTestCase { + + public void testObjectFormat() { + double lat = 38.897676; + double lon = -77.03653; + String[] expected = new String[] {lat + "," + lon}; + + SearchHit hit = new SearchHitBuilder(42) + .setSource("{\"geo\":{\"type\":\"point\", \"coordinates\": [" + lon + ", " + lat + "]}}") + .build(); + + ExtractedField geo = new GeoShapeField("geo"); + + assertThat(geo.value(hit), equalTo(expected)); + assertThat(geo.getName(), equalTo("geo")); + assertThat(geo.getSearchField(), equalTo("geo")); + assertThat(geo.getTypes(), contains("geo_shape")); + assertThat(geo.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(geo.supportsFromSource(), is(true)); + assertThat(geo.newFromSource(), sameInstance(geo)); + expectThrows(UnsupportedOperationException.class, () -> geo.getDocValueFormat()); + assertThat(geo.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> geo.getParentField()); + } + + public void testWKTFormat() { + double lat = 38.897676; + double lon = -77.03653; + String[] expected = new String[] {lat + "," + lon}; + + SearchHit hit = new SearchHitBuilder(42).setSource("{\"geo\":\"POINT ("+ lon + " " + lat + ")\"}").build(); + + ExtractedField geo = new GeoShapeField("geo"); + + assertThat(geo.value(hit), equalTo(expected)); + assertThat(geo.getName(), equalTo("geo")); + assertThat(geo.getSearchField(), equalTo("geo")); + assertThat(geo.getTypes(), contains("geo_shape")); + assertThat(geo.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(geo.supportsFromSource(), is(true)); + assertThat(geo.newFromSource(), sameInstance(geo)); + expectThrows(UnsupportedOperationException.class, () -> geo.getDocValueFormat()); + assertThat(geo.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> geo.getParentField()); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/MultiFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/MultiFieldTests.java new file mode 100644 index 0000000000000..ddbae4a3ef236 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/MultiFieldTests.java @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import java.util.Collections; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class MultiFieldTests extends ESTestCase { + + public void testGivenSameSearchField() { + SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("a.b", 2).build(); + + ExtractedField wrapped = new DocValueField("a.b", Collections.singleton("integer")); + ExtractedField field = new MultiField("a", wrapped); + + assertThat(field.value(hit), equalTo(new Integer[] { 2 })); + assertThat(field.getName(), equalTo("a.b")); + assertThat(field.getSearchField(), equalTo("a.b")); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(field.isMultiField(), is(true)); + assertThat(field.getParentField(), equalTo("a")); + assertThat(field.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.newFromSource()); + } + + public void testGivenDifferentSearchField() { + SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("a.b", 2).build(); + + ExtractedField wrapped = new DocValueField("a", Collections.singleton("integer")); + ExtractedField field = new MultiField("a.b", "a", "a", wrapped); + + assertThat(field.value(hit), equalTo(new Integer[] { 1 })); + assertThat(field.getName(), equalTo("a.b")); + assertThat(field.getSearchField(), equalTo("a")); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(field.isMultiField(), is(true)); + assertThat(field.getParentField(), equalTo("a")); + assertThat(field.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.newFromSource()); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ScriptFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ScriptFieldTests.java new file mode 100644 index 0000000000000..48629ec4a9fd6 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/ScriptFieldTests.java @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import java.util.Arrays; +import java.util.Collections; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class ScriptFieldTests extends ESTestCase { + + public void testKeyword() { + SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build(); + + ExtractedField field = new ScriptField("a_keyword"); + + assertThat(field.value(hit), equalTo(new String[] { "bar" })); + assertThat(field.getName(), equalTo("a_keyword")); + assertThat(field.getSearchField(), equalTo("a_keyword")); + assertThat(field.getTypes().isEmpty(), is(true)); + expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat()); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + assertThat(field.isMultiField(), is(false)); + assertThat(field.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.newFromSource()); + } + + public void testKeywordArray() { + SearchHit hit = new SearchHitBuilder(42).addField("array", Arrays.asList("a", "b")).build(); + + ExtractedField field = new ScriptField("array"); + + assertThat(field.value(hit), equalTo(new String[] { "a", "b" })); + assertThat(field.getName(), equalTo("array")); + assertThat(field.getSearchField(), equalTo("array")); + assertThat(field.getTypes().isEmpty(), is(true)); + expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat()); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + assertThat(field.isMultiField(), is(false)); + assertThat(field.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.newFromSource()); + + ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword")); + assertThat(missing.value(hit), equalTo(new Object[0])); + } + + public void testMissing() { + SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build(); + + ExtractedField missing = new ScriptField("missing"); + + assertThat(missing.value(hit), equalTo(new Object[0])); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/SourceFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/SourceFieldTests.java new file mode 100644 index 0000000000000..03fed7790a358 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/SourceFieldTests.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import java.util.Collections; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; + +public class SourceFieldTests extends ESTestCase { + + public void testSingleValue() { + SearchHit hit = new SearchHitBuilder(42).setSource("{\"single\":\"bar\"}").build(); + + ExtractedField field = new SourceField("single", Collections.singleton("text")); + + assertThat(field.value(hit), equalTo(new String[] { "bar" })); + assertThat(field.getName(), equalTo("single")); + assertThat(field.getSearchField(), equalTo("single")); + assertThat(field.getTypes(), contains("text")); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(field.supportsFromSource(), is(true)); + assertThat(field.newFromSource(), sameInstance(field)); + assertThat(field.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat()); + } + + public void testArray() { + SearchHit hit = new SearchHitBuilder(42).setSource("{\"array\":[\"a\",\"b\"]}").build(); + + ExtractedField field = new SourceField("array", Collections.singleton("text")); + + assertThat(field.value(hit), equalTo(new String[] { "a", "b" })); + assertThat(field.getName(), equalTo("array")); + assertThat(field.getSearchField(), equalTo("array")); + assertThat(field.getTypes(), contains("text")); + assertThat(field.getMethod(), equalTo(ExtractedField.Method.SOURCE)); + assertThat(field.supportsFromSource(), is(true)); + assertThat(field.newFromSource(), sameInstance(field)); + assertThat(field.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> field.getParentField()); + expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat()); + } + + public void testMissing() { + SearchHit hit = new SearchHitBuilder(42).setSource("{\"array\":[\"a\",\"b\"]}").build(); + + ExtractedField missing = new SourceField("missing", Collections.singleton("text")); + + assertThat(missing.value(hit), equalTo(new Object[0])); + } + + public void testValueGivenNested() { + SearchHit hit = new SearchHitBuilder(42).setSource("{\"level_1\":{\"level_2\":{\"foo\":\"bar\"}}}").build(); + + ExtractedField nested = new SourceField("level_1.level_2.foo", Collections.singleton("text")); + + assertThat(nested.value(hit), equalTo(new String[] { "bar" })); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/TimeFieldTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/TimeFieldTests.java new file mode 100644 index 0000000000000..e2898703d3460 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/extractor/TimeFieldTests.java @@ -0,0 +1,67 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.extractor; + +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.ml.test.SearchHitBuilder; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.startsWith; + +public class TimeFieldTests extends ESTestCase { + + public void testDocValueWithStringValue() { + long millis = randomLong(); + SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", Long.toString(millis)).build(); + + ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); + + assertThat(timeField.value(hit), equalTo(new Object[] { millis })); + assertThat(timeField.getName(), equalTo("time")); + assertThat(timeField.getSearchField(), equalTo("time")); + assertThat(timeField.getTypes(), contains("date")); + assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertThat(timeField.getDocValueFormat(), equalTo("epoch_millis")); + assertThat(timeField.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> timeField.newFromSource()); + assertThat(timeField.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> timeField.getParentField()); + } + + public void testScriptWithLongValue() { + long millis = randomLong(); + SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build(); + + ExtractedField timeField = new TimeField("time", ExtractedField.Method.SCRIPT_FIELD); + + assertThat(timeField.value(hit), equalTo(new Object[] { millis })); + assertThat(timeField.getName(), equalTo("time")); + assertThat(timeField.getSearchField(), equalTo("time")); + assertThat(timeField.getTypes(), contains("date")); + assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD)); + expectThrows(UnsupportedOperationException.class, () -> timeField.getDocValueFormat()); + assertThat(timeField.supportsFromSource(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> timeField.newFromSource()); + assertThat(timeField.isMultiField(), is(false)); + expectThrows(UnsupportedOperationException.class, () -> timeField.getParentField()); + } + + public void testUnknownFormat() { + final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", new Object()).build(); + + final ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE); + + assertThat(expectThrows(IllegalStateException.class, () -> timeField.value(hit)).getMessage(), + startsWith("Unexpected value for a time field")); + } + + public void testSourceNotSupported() { + expectThrows(IllegalArgumentException.class, () -> new TimeField("foo", ExtractedField.Method.SOURCE)); + } +}