[7.x][ML] Prevent fetching multi-field from source (#48770) (#48797)

Aggregatable mutli-fields are at the moment wrongly mapped
as normal doc_value fields and thus they support fetching from
source. However, they do not exist in the source. This results
to failure to extract such fields.

This commit fixes this bug. While a fix could be worked out
on top of the existing code, it is evident the extraction logic
has become difficult to understand and maintain. As we also
want to deduplicate multi-fields for data frame analytics,
it seemed appropriate to refactor the code to simplify and
better handle the extraction of multi-fields.

Relates #48756

Backport of #48770
This commit is contained in:
Dimitris Athanasiou 2019-11-01 14:18:03 +02:00 committed by GitHub
parent e1e9b23db8
commit 1f662e0b12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 1208 additions and 669 deletions

View File

@ -55,7 +55,7 @@ public class DatafeedJobsRestIT extends ESRestTestCase {
return true;
}
private void setupDataAccessRole(String index) throws IOException {
private static void setupDataAccessRole(String index) throws IOException {
Request request = new Request("PUT", "/_security/role/test_data_access");
request.setJsonEntity("{"
+ " \"indices\" : ["
@ -283,10 +283,12 @@ public class DatafeedJobsRestIT extends ESRestTestCase {
new LookbackOnlyTestHelper("test-lookback-only-with-source-disabled", "airline-data-disabled-source").execute();
}
@AwaitsFix(bugUrl = "This test uses painless which is not available in the integTest phase")
public void testLookbackOnlyWithScriptFields() throws Exception {
new LookbackOnlyTestHelper("test-lookback-only-with-script-fields", "airline-data-disabled-source")
.setAddScriptedFields(true).execute();
new LookbackOnlyTestHelper("test-lookback-only-with-script-fields", "airline-data")
.setScriptedFields(
"{\"scripted_airline\":{\"script\":{\"lang\":\"painless\",\"source\":\"doc['airline.keyword'].value\"}}}")
.setAirlineVariant("scripted_airline")
.execute();
}
public void testLookbackOnlyWithNestedFields() throws Exception {
@ -1088,7 +1090,7 @@ public class DatafeedJobsRestIT extends ESRestTestCase {
private String jobId;
private String airlineVariant;
private String dataIndex;
private boolean addScriptedFields;
private String scriptedFields;
private boolean shouldSucceedInput;
private boolean shouldSucceedProcessing;
@ -1100,8 +1102,8 @@ public class DatafeedJobsRestIT extends ESRestTestCase {
this.airlineVariant = "airline";
}
public LookbackOnlyTestHelper setAddScriptedFields(boolean value) {
addScriptedFields = value;
public LookbackOnlyTestHelper setScriptedFields(String scriptFields) {
this.scriptedFields = scriptFields;
return this;
}
@ -1124,10 +1126,7 @@ public class DatafeedJobsRestIT extends ESRestTestCase {
public void execute() throws Exception {
createJob(jobId, airlineVariant);
String datafeedId = "datafeed-" + jobId;
new DatafeedBuilder(datafeedId, jobId, dataIndex)
.setScriptedFields(addScriptedFields ?
"{\"airline\":{\"script\":{\"lang\":\"painless\",\"inline\":\"doc['airline'].value\"}}}" : null)
.build();
new DatafeedBuilder(datafeedId, jobId, dataIndex).setScriptedFields(scriptedFields).build();
openJob(client(), jobId);
startDatafeedAndWaitUntilStopped(datafeedId);

View File

@ -130,7 +130,7 @@ class ScrollDataExtractor implements DataExtractor {
context.query, context.extractedFields.timeField(), start, context.end));
for (ExtractedField docValueField : context.extractedFields.getDocValueFields()) {
searchRequestBuilder.addDocValueField(docValueField.getName(), docValueField.getDocValueFormat());
searchRequestBuilder.addDocValueField(docValueField.getSearchField(), docValueField.getDocValueFormat());
}
String[] sourceFields = context.extractedFields.getSourceFields();
if (sourceFields.length == 0) {

View File

@ -29,7 +29,7 @@ class SearchHitToJsonProcessor implements Releasable {
public void process(SearchHit hit) throws IOException {
jsonBuilder.startObject();
for (ExtractedField field : fields.getAllFields()) {
writeKeyValue(field.getAlias(), field.value(hit));
writeKeyValue(field.getName(), field.value(hit));
}
jsonBuilder.endObject();
}

View File

@ -14,7 +14,6 @@ import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Set;
@ -42,13 +41,13 @@ public class TimeBasedExtractedFields extends ExtractedFields {
public Long timeFieldValue(SearchHit hit) {
Object[] value = timeField.value(hit);
if (value.length != 1) {
throw new RuntimeException("Time field [" + timeField.getAlias() + "] expected a single value; actual was: "
throw new RuntimeException("Time field [" + timeField.getName() + "] expected a single value; actual was: "
+ Arrays.toString(value));
}
if (value[0] instanceof Long) {
return (Long) value[0];
}
throw new RuntimeException("Time field [" + timeField.getAlias() + "] expected a long value; actual was: " + value[0]);
throw new RuntimeException("Time field [" + timeField.getName() + "] expected a long value; actual was: " + value[0]);
}
public static TimeBasedExtractedFields build(Job job, DatafeedConfig datafeed, FieldCapabilitiesResponse fieldsCapabilities) {
@ -58,20 +57,18 @@ public class TimeBasedExtractedFields extends ExtractedFields {
if (scriptFields.contains(timeField) == false && extractionMethodDetector.isAggregatable(timeField) == false) {
throw new IllegalArgumentException("cannot retrieve time field [" + timeField + "] because it is not aggregatable");
}
ExtractedField timeExtractedField = extractedTimeField(timeField, scriptFields, fieldsCapabilities);
ExtractedField timeExtractedField = extractedTimeField(timeField, scriptFields);
List<String> remainingFields = job.allInputFields().stream().filter(f -> !f.equals(timeField)).collect(Collectors.toList());
List<ExtractedField> allExtractedFields = new ArrayList<>(remainingFields.size() + 1);
allExtractedFields.add(timeExtractedField);
remainingFields.stream().forEach(field -> allExtractedFields.add(extractionMethodDetector.detect(field)));
return new TimeBasedExtractedFields(timeExtractedField, allExtractedFields);
}
private static ExtractedField extractedTimeField(String timeField, Set<String> scriptFields,
FieldCapabilitiesResponse fieldCapabilities) {
if (scriptFields.contains(timeField)) {
return ExtractedField.newTimeField(timeField, Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD);
}
return ExtractedField.newTimeField(timeField, fieldCapabilities.getField(timeField).keySet(),
ExtractedField.ExtractionMethod.DOC_VALUE);
private static ExtractedField extractedTimeField(String timeField, Set<String> scriptFields) {
ExtractedField.Method method = scriptFields.contains(timeField) ? ExtractedField.Method.SCRIPT_FIELD
: ExtractedField.Method.DOC_VALUE;
return ExtractedFields.newTimeField(timeField, method);
}
}

View File

@ -24,8 +24,8 @@ import org.elasticsearch.search.fetch.StoredFieldsContext;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.xpack.core.ClientHelper;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.DataFrameAnalysis;
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex;
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
import java.io.IOException;
import java.util.ArrayList;
@ -138,7 +138,7 @@ public class DataFrameDataExtractor {
setFetchSource(searchRequestBuilder);
for (ExtractedField docValueField : context.extractedFields.getDocValueFields()) {
searchRequestBuilder.addDocValueField(docValueField.getName(), docValueField.getDocValueFormat());
searchRequestBuilder.addDocValueField(docValueField.getSearchField(), docValueField.getDocValueFormat());
}
return searchRequestBuilder;
@ -231,7 +231,7 @@ public class DataFrameDataExtractor {
}
public List<String> getFieldNames() {
return context.extractedFields.getAllFields().stream().map(ExtractedField::getAlias).collect(Collectors.toList());
return context.extractedFields.getAllFields().stream().map(ExtractedField::getName).collect(Collectors.toList());
}
public DataSummary collectDataSummary() {

View File

@ -11,11 +11,9 @@ import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.BooleanFieldMapper;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest;
@ -24,9 +22,9 @@ import org.elasticsearch.xpack.core.ml.dataframe.analyses.Types;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
import org.elasticsearch.xpack.core.ml.utils.NameResolver;
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex;
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex;
import java.util.ArrayList;
import java.util.Arrays;
@ -264,13 +262,13 @@ public class ExtractedFieldsDetector {
List<ExtractedField> adjusted = new ArrayList<>(extractedFields.getAllFields().size());
for (ExtractedField field : extractedFields.getAllFields()) {
if (isBoolean(field.getTypes())) {
if (config.getAnalysis().getAllowedCategoricalTypes(field.getAlias()).contains(BooleanFieldMapper.CONTENT_TYPE)) {
if (config.getAnalysis().getAllowedCategoricalTypes(field.getName()).contains(BooleanFieldMapper.CONTENT_TYPE)) {
// We convert boolean field to string if it is a categorical dependent variable
adjusted.add(new BooleanMapper<>(field, Boolean.TRUE.toString(), Boolean.FALSE.toString()));
adjusted.add(ExtractedFields.applyBooleanMapping(field, Boolean.TRUE.toString(), Boolean.FALSE.toString()));
} else {
// We convert boolean fields to integers with values 0, 1 as this is the preferred
// way to consume such features in the analytics process.
adjusted.add(new BooleanMapper<>(field, 1, 0));
adjusted.add(ExtractedFields.applyBooleanMapping(field, 1, 0));
}
} else {
adjusted.add(field);
@ -282,33 +280,4 @@ public class ExtractedFieldsDetector {
private static boolean isBoolean(Set<String> types) {
return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE);
}
/**
* {@link BooleanMapper} makes boolean field behave as a field of different type.
*/
private static final class BooleanMapper<T> extends ExtractedField {
private final T trueValue;
private final T falseValue;
BooleanMapper(ExtractedField field, T trueValue, T falseValue) {
super(field.getAlias(), field.getName(), Collections.singleton(BooleanFieldMapper.CONTENT_TYPE), ExtractionMethod.DOC_VALUE);
this.trueValue = trueValue;
this.falseValue = falseValue;
}
@Override
public Object[] value(SearchHit hit) {
DocumentField keyValue = hit.field(name);
if (keyValue != null) {
return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray();
}
return new Object[0];
}
@Override
public boolean supportsFromSource() {
return false;
}
}
}

View File

@ -0,0 +1,49 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.search.SearchHit;
import java.util.List;
import java.util.Objects;
import java.util.Set;
abstract class AbstractField implements ExtractedField {
private final String name;
private final Set<String> types;
AbstractField(String name, Set<String> types) {
this.name = Objects.requireNonNull(name);
this.types = Objects.requireNonNull(types);
}
@Override
public String getName() {
return name;
}
@Override
public String getSearchField() {
return name;
}
@Override
public Set<String> getTypes() {
return types;
}
protected Object[] getFieldValue(SearchHit hit) {
DocumentField keyValue = hit.field(getSearchField());
if (keyValue != null) {
List<Object> values = keyValue.getValues();
return values.toArray(new Object[0]);
}
return new Object[0];
}
}

View File

@ -0,0 +1,48 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.search.SearchHit;
import java.util.Set;
public class DocValueField extends AbstractField {
public DocValueField(String name, Set<String> types) {
super(name, types);
}
@Override
public Method getMethod() {
return Method.DOC_VALUE;
}
@Override
public Object[] value(SearchHit hit) {
return getFieldValue(hit);
}
@Override
public boolean supportsFromSource() {
return true;
}
@Override
public ExtractedField newFromSource() {
return new SourceField(getSearchField(), getTypes());
}
@Override
public boolean isMultiField() {
return false;
}
@Nullable
public String getDocValueFormat() {
return null;
}
}

View File

@ -5,305 +5,77 @@
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.geometry.Geometry;
import org.elasticsearch.geometry.Point;
import org.elasticsearch.geometry.ShapeType;
import org.elasticsearch.geometry.utils.StandardValidator;
import org.elasticsearch.geometry.utils.WellKnownText;
import org.elasticsearch.search.SearchHit;
import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
/**
* Represents a field to be extracted by the datafeed.
* It encapsulates the extraction logic.
* Describes how to extract an analyzed field
*/
public abstract class ExtractedField {
public interface ExtractedField {
public enum ExtractionMethod {
enum Method {
SOURCE, DOC_VALUE, SCRIPT_FIELD
}
/** The name of the field as configured in the job */
protected final String alias;
/**
* @return The name of the field as expected by the user
*/
String getName();
/** The name of the field we extract */
protected final String name;
/**
* This is the name of the field we should search for.
* In most cases this is the same as {@link #getName()}.
* However, if the field is a non-aggregatable multi-field
* we cannot retrieve it from source. Thus we search for
* its parent instead.
* @return The name of the field that is searched.
*/
String getSearchField();
private final Set<String> types;
/**
* @return The field types
*/
Set<String> getTypes();
private final ExtractionMethod extractionMethod;
/**
* @return The extraction {@link Method}
*/
Method getMethod();
protected ExtractedField(String alias, String name, Set<String> types, ExtractionMethod extractionMethod) {
this.alias = Objects.requireNonNull(alias);
this.name = Objects.requireNonNull(name);
this.types = Objects.requireNonNull(types);
this.extractionMethod = Objects.requireNonNull(extractionMethod);
/**
* Extracts the value from a {@link SearchHit}
* @param hit the search hit
* @return the extracted value
*/
Object[] value(SearchHit hit);
/**
* @return Whether the field can be fetched from source instead
*/
boolean supportsFromSource();
/**
* @return A new extraction field that's fetching from source
*/
ExtractedField newFromSource();
/**
* @return Whether it is a multi-field
*/
boolean isMultiField();
/**
* @return The multi-field parent
*/
default String getParentField() {
throw new UnsupportedOperationException();
}
public String getAlias() {
return alias;
}
public String getName() {
return name;
}
public Set<String> getTypes() {
return types;
}
public ExtractionMethod getExtractionMethod() {
return extractionMethod;
}
public abstract Object[] value(SearchHit hit);
public abstract boolean supportsFromSource();
public String getDocValueFormat() {
return null;
}
public static ExtractedField newTimeField(String name, Set<String> types, ExtractionMethod extractionMethod) {
if (extractionMethod == ExtractionMethod.SOURCE) {
throw new IllegalArgumentException("time field cannot be extracted from source");
}
return new TimeField(name, types, extractionMethod);
}
public static ExtractedField newGeoShapeField(String alias, String name) {
return new GeoShapeField(alias, name, Collections.singleton("geo_shape"));
}
public static ExtractedField newGeoPointField(String alias, String name) {
return new GeoPointField(alias, name, Collections.singleton("geo_point"));
}
public static ExtractedField newField(String name, Set<String> types, ExtractionMethod extractionMethod) {
return newField(name, name, types, extractionMethod);
}
public static ExtractedField newField(String alias, String name, Set<String> types, ExtractionMethod extractionMethod) {
switch (extractionMethod) {
case DOC_VALUE:
case SCRIPT_FIELD:
return new FromFields(alias, name, types, extractionMethod);
case SOURCE:
return new FromSource(alias, name, types);
default:
throw new IllegalArgumentException("Invalid extraction method [" + extractionMethod + "]");
}
}
public ExtractedField newFromSource() {
if (supportsFromSource()) {
return new FromSource(alias, name, types);
}
throw new IllegalStateException("Field (alias [" + alias + "], name [" + name + "]) should be extracted via ["
+ extractionMethod + "] and cannot be extracted from source");
}
private static class FromFields extends ExtractedField {
FromFields(String alias, String name, Set<String> types, ExtractionMethod extractionMethod) {
super(alias, name, types, extractionMethod);
}
@Override
public Object[] value(SearchHit hit) {
DocumentField keyValue = hit.field(name);
if (keyValue != null) {
List<Object> values = keyValue.getValues();
return values.toArray(new Object[0]);
}
return new Object[0];
}
@Override
public boolean supportsFromSource() {
return getExtractionMethod() == ExtractionMethod.DOC_VALUE;
}
}
private static class GeoShapeField extends FromSource {
private static final WellKnownText wkt = new WellKnownText(true, new StandardValidator(true));
GeoShapeField(String alias, String name, Set<String> types) {
super(alias, name, types);
}
@Override
public Object[] value(SearchHit hit) {
Object[] value = super.value(hit);
if (value.length != 1) {
throw new IllegalStateException("Unexpected values for a geo_shape field: " + Arrays.toString(value));
}
if (value[0] instanceof String) {
value[0] = handleString((String) value[0]);
} else if (value[0] instanceof Map<?, ?>) {
@SuppressWarnings("unchecked")
Map<String, Object> geoObject = (Map<String, Object>) value[0];
value[0] = handleObject(geoObject);
} else {
throw new IllegalStateException("Unexpected value type for a geo_shape field: " + value[0].getClass());
}
return value;
}
private String handleString(String geoString) {
try {
if (geoString.startsWith("POINT")) { // Entry is of the form "POINT (-77.03653 38.897676)"
Geometry geometry = wkt.fromWKT(geoString);
if (geometry.type() != ShapeType.POINT) {
throw new IllegalArgumentException("Unexpected non-point geo_shape type: " + geometry.type().name());
}
Point pt = ((Point)geometry);
return pt.getY() + "," + pt.getX();
} else {
throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString);
}
} catch (IOException | ParseException ex) {
throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString);
}
}
private String handleObject(Map<String, Object> geoObject) {
String geoType = (String) geoObject.get("type");
if (geoType != null && "point".equals(geoType.toLowerCase(Locale.ROOT))) {
@SuppressWarnings("unchecked")
List<Double> coordinates = (List<Double>) geoObject.get("coordinates");
if (coordinates == null || coordinates.size() != 2) {
throw new IllegalArgumentException("Invalid coordinates for geo_shape point: " + geoObject);
}
return coordinates.get(1) + "," + coordinates.get(0);
} else {
throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoObject);
}
}
}
private static class GeoPointField extends FromFields {
GeoPointField(String alias, String name, Set<String> types) {
super(alias, name, types, ExtractionMethod.DOC_VALUE);
}
@Override
public Object[] value(SearchHit hit) {
Object[] value = super.value(hit);
if (value.length != 1) {
throw new IllegalStateException("Unexpected values for a geo_point field: " + Arrays.toString(value));
}
if (value[0] instanceof String) {
value[0] = handleString((String) value[0]);
} else {
throw new IllegalStateException("Unexpected value type for a geo_point field: " + value[0].getClass());
}
return value;
}
private String handleString(String geoString) {
if (geoString.contains(",")) { // Entry is of the form "38.897676, -77.03653"
return geoString.replace(" ", "");
} else {
throw new IllegalArgumentException("Unexpected value for a geo_point field: " + geoString);
}
}
@Override
public boolean supportsFromSource() {
return false;
}
}
private static class TimeField extends FromFields {
private static final String EPOCH_MILLIS_FORMAT = "epoch_millis";
TimeField(String name, Set<String> types, ExtractionMethod extractionMethod) {
super(name, name, types, extractionMethod);
}
@Override
public Object[] value(SearchHit hit) {
Object[] value = super.value(hit);
if (value.length != 1) {
return value;
}
if (value[0] instanceof String) { // doc_value field with the epoch_millis format
value[0] = Long.parseLong((String) value[0]);
} else if (value[0] instanceof Long == false) { // pre-6.0 field
throw new IllegalStateException("Unexpected value for a time field: " + value[0].getClass());
}
return value;
}
@Override
public String getDocValueFormat() {
return EPOCH_MILLIS_FORMAT;
}
@Override
public boolean supportsFromSource() {
return false;
}
}
private static class FromSource extends ExtractedField {
private String[] namePath;
FromSource(String alias, String name, Set<String> types) {
super(alias, name, types, ExtractionMethod.SOURCE);
namePath = name.split("\\.");
}
@Override
public Object[] value(SearchHit hit) {
Map<String, Object> source = hit.getSourceAsMap();
int level = 0;
while (source != null && level < namePath.length - 1) {
source = getNextLevel(source, namePath[level]);
level++;
}
if (source != null) {
Object values = source.get(namePath[level]);
if (values != null) {
if (values instanceof List<?>) {
@SuppressWarnings("unchecked")
List<Object> asList = (List<Object>) values;
return asList.toArray(new Object[0]);
} else {
return new Object[]{values};
}
}
}
return new Object[0];
}
@Override
public boolean supportsFromSource() {
return true;
}
@SuppressWarnings("unchecked")
private static Map<String, Object> getNextLevel(Map<String, Object> source, String key) {
Object nextLevel = source.get(key);
if (nextLevel instanceof Map<?, ?>) {
return (Map<String, Object>) source.get(key);
}
return null;
}
/**
* @return The doc_value format
*/
default String getDocValueFormat() {
throw new UnsupportedOperationException();
}
}

View File

@ -7,6 +7,9 @@ package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.index.mapper.BooleanFieldMapper;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.xpack.core.ml.utils.MlStrings;
import java.util.Collection;
@ -22,16 +25,14 @@ import java.util.stream.Collectors;
*/
public class ExtractedFields {
private static final String TEXT = "text";
private final List<ExtractedField> allFields;
private final List<ExtractedField> docValueFields;
private final String[] sourceFields;
public ExtractedFields(List<ExtractedField> allFields) {
this.allFields = Collections.unmodifiableList(allFields);
this.docValueFields = filterFields(ExtractedField.ExtractionMethod.DOC_VALUE, allFields);
this.sourceFields = filterFields(ExtractedField.ExtractionMethod.SOURCE, allFields).stream().map(ExtractedField::getName)
this.docValueFields = filterFields(ExtractedField.Method.DOC_VALUE, allFields);
this.sourceFields = filterFields(ExtractedField.Method.SOURCE, allFields).stream().map(ExtractedField::getSearchField)
.toArray(String[]::new);
}
@ -47,8 +48,8 @@ public class ExtractedFields {
return docValueFields;
}
private static List<ExtractedField> filterFields(ExtractedField.ExtractionMethod method, List<ExtractedField> fields) {
return fields.stream().filter(field -> field.getExtractionMethod() == method).collect(Collectors.toList());
private static List<ExtractedField> filterFields(ExtractedField.Method method, List<ExtractedField> fields) {
return fields.stream().filter(field -> field.getMethod() == method).collect(Collectors.toList());
}
public static ExtractedFields build(Collection<String> allFields, Set<String> scriptFields,
@ -57,6 +58,14 @@ public class ExtractedFields {
return new ExtractedFields(allFields.stream().map(field -> extractionMethodDetector.detect(field)).collect(Collectors.toList()));
}
public static TimeField newTimeField(String name, ExtractedField.Method method) {
return new TimeField(name, method);
}
public static <T> ExtractedField applyBooleanMapping(ExtractedField field, T trueValue, T falseValue) {
return new BooleanMapper<>(field, trueValue, falseValue);
}
public static class ExtractionMethodDetector {
private final Set<String> scriptFields;
@ -68,38 +77,37 @@ public class ExtractedFields {
}
public ExtractedField detect(String field) {
String internalField = field;
ExtractedField.ExtractionMethod method = ExtractedField.ExtractionMethod.SOURCE;
Set<String> types = getTypes(field);
if (scriptFields.contains(field)) {
method = ExtractedField.ExtractionMethod.SCRIPT_FIELD;
} else if (isAggregatable(field)) {
method = ExtractedField.ExtractionMethod.DOC_VALUE;
if (isFieldOfType(field, "date")) {
return ExtractedField.newTimeField(field, types, method);
}
} else if (isFieldOfType(field, TEXT)) {
String parentField = MlStrings.getParentField(field);
// Field is text so check if it is a multi-field
if (Objects.equals(parentField, field) == false && fieldsCapabilities.getField(parentField) != null) {
// Field is a multi-field which means it won't be available in source. Let's take the parent instead.
internalField = parentField;
method = isAggregatable(parentField) ? ExtractedField.ExtractionMethod.DOC_VALUE
: ExtractedField.ExtractionMethod.SOURCE;
return new ScriptField(field);
}
ExtractedField extractedField = detectNonScriptField(field);
String parentField = MlStrings.getParentField(field);
if (isMultiField(field, parentField)) {
if (isAggregatable(field)) {
return new MultiField(parentField, extractedField);
} else {
ExtractedField parentExtractionField = detectNonScriptField(parentField);
return new MultiField(field, parentField, parentField, parentExtractionField);
}
}
return extractedField;
}
if (isFieldOfType(field, "geo_point")) {
if (method != ExtractedField.ExtractionMethod.DOC_VALUE) {
private ExtractedField detectNonScriptField(String field) {
if (isFieldOfType(field, TimeField.TYPE) && isAggregatable(field)) {
return new TimeField(field, ExtractedField.Method.DOC_VALUE);
}
if (isFieldOfType(field, GeoPointField.TYPE)) {
if (isAggregatable(field) == false) {
throw new IllegalArgumentException("cannot use [geo_point] field with disabled doc values");
}
return ExtractedField.newGeoPointField(field, internalField);
return new GeoPointField(field);
}
if (isFieldOfType(field, "geo_shape")) {
return ExtractedField.newGeoShapeField(field, internalField);
if (isFieldOfType(field, GeoShapeField.TYPE)) {
return new GeoShapeField(field);
}
return ExtractedField.newField(field, internalField, types, method);
Set<String> types = getTypes(field);
return isAggregatable(field) ? new DocValueField(field, types) : new SourceField(field, types);
}
private Set<String> getTypes(String field) {
@ -127,5 +135,57 @@ public class ExtractedFields {
}
return false;
}
private boolean isMultiField(String field, String parent) {
if (Objects.equals(field, parent)) {
return false;
}
Map<String, FieldCapabilities> parentFieldCaps = fieldsCapabilities.getField(parent);
if (parentFieldCaps == null || (parentFieldCaps.size() == 1 && parentFieldCaps.containsKey("object"))) {
// We check if the parent is an object which is indicated by field caps containing an "object" entry.
// If an object, it's not a multi field
return false;
}
return true;
}
}
/**
* Makes boolean fields behave as a field of different type.
*/
private static final class BooleanMapper<T> extends DocValueField {
private static final Set<String> TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE);
private final T trueValue;
private final T falseValue;
BooleanMapper(ExtractedField field, T trueValue, T falseValue) {
super(field.getName(), TYPES);
if (field.getMethod() != Method.DOC_VALUE || field.getTypes().contains(BooleanFieldMapper.CONTENT_TYPE) == false) {
throw new IllegalArgumentException("cannot apply boolean mapping to field [" + field.getName() + "]");
}
this.trueValue = trueValue;
this.falseValue = falseValue;
}
@Override
public Object[] value(SearchHit hit) {
DocumentField keyValue = hit.field(getName());
if (keyValue != null) {
return keyValue.getValues().stream().map(v -> Boolean.TRUE.equals(v) ? trueValue : falseValue).toArray();
}
return new Object[0];
}
@Override
public boolean supportsFromSource() {
return false;
}
@Override
public ExtractedField newFromSource() {
throw new UnsupportedOperationException();
}
}
}

View File

@ -0,0 +1,55 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
public class GeoPointField extends DocValueField {
static final String TYPE = "geo_point";
private static final Set<String> TYPES = Collections.singleton(TYPE);
public GeoPointField(String name) {
super(name, TYPES);
}
@Override
public Object[] value(SearchHit hit) {
Object[] value = super.value(hit);
if (value.length != 1) {
throw new IllegalStateException("Unexpected values for a geo_point field: " + Arrays.toString(value));
}
if (value[0] instanceof String) {
value[0] = handleString((String) value[0]);
} else {
throw new IllegalStateException("Unexpected value type for a geo_point field: " + value[0].getClass());
}
return value;
}
private String handleString(String geoString) {
if (geoString.contains(",")) { // Entry is of the form "38.897676, -77.03653"
return geoString.replace(" ", "");
} else {
throw new IllegalArgumentException("Unexpected value for a geo_point field: " + geoString);
}
}
@Override
public boolean supportsFromSource() {
return false;
}
@Override
public ExtractedField newFromSource() {
throw new UnsupportedOperationException();
}
}

View File

@ -0,0 +1,84 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.geometry.Geometry;
import org.elasticsearch.geometry.Point;
import org.elasticsearch.geometry.ShapeType;
import org.elasticsearch.geometry.utils.StandardValidator;
import org.elasticsearch.geometry.utils.WellKnownText;
import org.elasticsearch.search.SearchHit;
import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
public class GeoShapeField extends SourceField {
static final String TYPE = "geo_shape";
private static final Set<String> TYPES = Collections.singleton(TYPE);
private static final WellKnownText wkt = new WellKnownText(true, new StandardValidator(true));
public GeoShapeField(String name) {
super(name, TYPES);
}
@Override
public Object[] value(SearchHit hit) {
Object[] value = super.value(hit);
if (value.length != 1) {
throw new IllegalStateException("Unexpected values for a geo_shape field: " + Arrays.toString(value));
}
if (value[0] instanceof String) {
value[0] = handleString((String) value[0]);
} else if (value[0] instanceof Map<?, ?>) {
@SuppressWarnings("unchecked")
Map<String, Object> geoObject = (Map<String, Object>) value[0];
value[0] = handleObject(geoObject);
} else {
throw new IllegalStateException("Unexpected value type for a geo_shape field: " + value[0].getClass());
}
return value;
}
private String handleString(String geoString) {
try {
if (geoString.startsWith("POINT")) { // Entry is of the form "POINT (-77.03653 38.897676)"
Geometry geometry = wkt.fromWKT(geoString);
if (geometry.type() != ShapeType.POINT) {
throw new IllegalArgumentException("Unexpected non-point geo_shape type: " + geometry.type().name());
}
Point pt = ((Point)geometry);
return pt.getY() + "," + pt.getX();
} else {
throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString);
}
} catch (IOException | ParseException ex) {
throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoString);
}
}
private String handleObject(Map<String, Object> geoObject) {
String geoType = (String) geoObject.get("type");
if (geoType != null && "point".equals(geoType.toLowerCase(Locale.ROOT))) {
@SuppressWarnings("unchecked")
List<Double> coordinates = (List<Double>) geoObject.get("coordinates");
if (coordinates == null || coordinates.size() != 2) {
throw new IllegalArgumentException("Invalid coordinates for geo_shape point: " + geoObject);
}
return coordinates.get(1) + "," + coordinates.get(0);
} else {
throw new IllegalArgumentException("Unexpected value for a geo_shape field: " + geoObject);
}
}
}

View File

@ -0,0 +1,80 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import java.util.Objects;
import java.util.Set;
public class MultiField implements ExtractedField {
private final String name;
private final String searchField;
private final ExtractedField field;
private final String parent;
MultiField(String parent, ExtractedField field) {
this(field.getName(), field.getSearchField(), parent, field);
}
MultiField(String name, String searchField, String parent, ExtractedField field) {
this.name = Objects.requireNonNull(name);
this.searchField = Objects.requireNonNull(searchField);
this.field = Objects.requireNonNull(field);
this.parent = Objects.requireNonNull(parent);
}
@Override
public String getName() {
return name;
}
@Override
public String getSearchField() {
return searchField;
}
@Override
public Set<String> getTypes() {
return field.getTypes();
}
@Override
public Method getMethod() {
return field.getMethod();
}
@Override
public Object[] value(SearchHit hit) {
return field.value(hit);
}
@Override
public boolean supportsFromSource() {
return false;
}
@Override
public ExtractedField newFromSource() {
throw new UnsupportedOperationException();
}
@Override
public boolean isMultiField() {
return true;
}
@Override
public String getParentField() {
return parent;
}
@Override
public String getDocValueFormat() {
return field.getDocValueFormat();
}
}

View File

@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import java.util.Collections;
public class ScriptField extends AbstractField {
public ScriptField(String name) {
super(name, Collections.emptySet());
}
@Override
public Method getMethod() {
return Method.SCRIPT_FIELD;
}
@Override
public Object[] value(SearchHit hit) {
return getFieldValue(hit);
}
@Override
public boolean supportsFromSource() {
return false;
}
@Override
public ExtractedField newFromSource() {
throw new UnsupportedOperationException();
}
@Override
public boolean isMultiField() {
return false;
}
}

View File

@ -0,0 +1,74 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class SourceField extends AbstractField {
private final String[] path;
public SourceField(String name, Set<String> types) {
super(name, types);
path = name.split("\\.");
}
@Override
public Method getMethod() {
return Method.SOURCE;
}
@Override
public Object[] value(SearchHit hit) {
Map<String, Object> source = hit.getSourceAsMap();
int level = 0;
while (source != null && level < path.length - 1) {
source = getNextLevel(source, path[level]);
level++;
}
if (source != null) {
Object values = source.get(path[level]);
if (values != null) {
if (values instanceof List<?>) {
@SuppressWarnings("unchecked")
List<Object> asList = (List<Object>) values;
return asList.toArray(new Object[0]);
} else {
return new Object[]{values};
}
}
}
return new Object[0];
}
@SuppressWarnings("unchecked")
private static Map<String, Object> getNextLevel(Map<String, Object> source, String key) {
Object nextLevel = source.get(key);
if (nextLevel instanceof Map<?, ?>) {
return (Map<String, Object>) source.get(key);
}
return null;
}
@Override
public boolean supportsFromSource() {
return true;
}
@Override
public ExtractedField newFromSource() {
return this;
}
@Override
public boolean isMultiField() {
return false;
}
}

View File

@ -0,0 +1,73 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import java.util.Collections;
import java.util.Objects;
import java.util.Set;
public class TimeField extends AbstractField {
static final String TYPE = "date";
private static final Set<String> TYPES = Collections.singleton(TYPE);
private static final String EPOCH_MILLIS_FORMAT = "epoch_millis";
private final Method method;
public TimeField(String name, Method method) {
super(name, TYPES);
if (method == Method.SOURCE) {
throw new IllegalArgumentException("time field [" + name + "] cannot be extracted from source");
}
this.method = Objects.requireNonNull(method);
}
@Override
public Method getMethod() {
return method;
}
@Override
public Object[] value(SearchHit hit) {
Object[] value = getFieldValue(hit);
if (value.length != 1) {
return value;
}
if (value[0] instanceof String) { // doc_value field with the epoch_millis format
value[0] = Long.parseLong((String) value[0]);
} else if (value[0] instanceof Long == false) { // pre-6.0 field
throw new IllegalStateException("Unexpected value for a time field: " + value[0].getClass());
}
return value;
}
@Override
public String getDocValueFormat() {
if (method != Method.DOC_VALUE) {
throw new UnsupportedOperationException();
}
return EPOCH_MILLIS_FORMAT;
}
@Override
public boolean supportsFromSource() {
return false;
}
@Override
public ExtractedField newFromSource() {
throw new UnsupportedOperationException();
}
@Override
public boolean isMultiField() {
return false;
}
}

View File

@ -32,7 +32,9 @@ import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedTimingStats;
import org.elasticsearch.xpack.ml.datafeed.DatafeedTimingStatsReporter;
import org.elasticsearch.xpack.ml.datafeed.DatafeedTimingStatsReporter.DatafeedTimingStatsPersister;
import org.elasticsearch.xpack.ml.extractor.DocValueField;
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
import org.elasticsearch.xpack.ml.extractor.TimeField;
import org.junit.Before;
import org.mockito.ArgumentCaptor;
@ -134,11 +136,9 @@ public class ScrollDataExtractorTests extends ESTestCase {
capturedSearchRequests = new ArrayList<>();
capturedContinueScrollIds = new ArrayList<>();
jobId = "test-job";
ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE);
extractedFields = new TimeBasedExtractedFields(timeField,
Arrays.asList(timeField, ExtractedField.newField("field_1", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE)));
Arrays.asList(timeField, new DocValueField("field_1", Collections.singleton("keyword"))));
indices = Arrays.asList("index-1", "index-2");
query = QueryBuilders.matchAllQuery();
scriptFields = Collections.emptyList();

View File

@ -7,8 +7,10 @@ package org.elasticsearch.xpack.ml.datafeed.extractor.scroll;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.extractor.DocValueField;
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
import org.elasticsearch.xpack.ml.extractor.TimeField;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import java.io.ByteArrayOutputStream;
@ -22,14 +24,10 @@ import static org.hamcrest.Matchers.equalTo;
public class SearchHitToJsonProcessorTests extends ESTestCase {
public void testProcessGivenSingleHit() throws IOException {
ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField missingField = ExtractedField.newField("missing", Collections.singleton("float"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField singleField = ExtractedField.newField("single", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField arrayField = ExtractedField.newField("array", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE);
ExtractedField missingField = new DocValueField("missing", Collections.singleton("float"));
ExtractedField singleField = new DocValueField("single", Collections.singleton("keyword"));
ExtractedField arrayField = new DocValueField("array", Collections.singleton("keyword"));
TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField,
Arrays.asList(timeField, missingField, singleField, arrayField));
@ -45,14 +43,10 @@ public class SearchHitToJsonProcessorTests extends ESTestCase {
}
public void testProcessGivenMultipleHits() throws IOException {
ExtractedField timeField = ExtractedField.newField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField missingField = ExtractedField.newField("missing", Collections.singleton("float"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField singleField = ExtractedField.newField("single", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField arrayField = ExtractedField.newField("array", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE);
ExtractedField missingField = new DocValueField("missing", Collections.singleton("float"));
ExtractedField singleField = new DocValueField("single", Collections.singleton("keyword"));
ExtractedField arrayField = new DocValueField("array", Collections.singleton("keyword"));
TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField,
Arrays.asList(timeField, missingField, singleField, arrayField));

View File

@ -15,7 +15,12 @@ import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.Detector;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.ml.extractor.DocValueField;
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
import org.elasticsearch.xpack.ml.extractor.ScriptField;
import org.elasticsearch.xpack.ml.extractor.SourceField;
import org.elasticsearch.xpack.ml.extractor.TimeField;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import java.util.Arrays;
@ -30,8 +35,7 @@ import static org.mockito.Mockito.when;
public class TimeBasedExtractedFieldsTests extends ESTestCase {
private ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE);
private ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE);
public void testInvalidConstruction() {
expectThrows(IllegalArgumentException.class, () -> new TimeBasedExtractedFields(timeField, Collections.emptyList()));
@ -48,18 +52,12 @@ public class TimeBasedExtractedFieldsTests extends ESTestCase {
}
public void testAllTypesOfFields() {
ExtractedField docValue1 = ExtractedField.newField("doc1", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField docValue2 = ExtractedField.newField("doc2", Collections.singleton("float"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField scriptField1 = ExtractedField.newField("scripted1", Collections.emptySet(),
ExtractedField.ExtractionMethod.SCRIPT_FIELD);
ExtractedField scriptField2 = ExtractedField.newField("scripted2", Collections.emptySet(),
ExtractedField.ExtractionMethod.SCRIPT_FIELD);
ExtractedField sourceField1 = ExtractedField.newField("src1", Collections.singleton("text"),
ExtractedField.ExtractionMethod.SOURCE);
ExtractedField sourceField2 = ExtractedField.newField("src2", Collections.singleton("text"),
ExtractedField.ExtractionMethod.SOURCE);
ExtractedField docValue1 = new DocValueField("doc1", Collections.singleton("keyword"));
ExtractedField docValue2 = new DocValueField("doc2", Collections.singleton("float"));
ExtractedField scriptField1 = new ScriptField("scripted1");
ExtractedField scriptField2 = new ScriptField("scripted2");
ExtractedField sourceField1 = new SourceField("src1", Collections.singleton("text"));
ExtractedField sourceField2 = new SourceField("src2", Collections.singleton("text"));
TimeBasedExtractedFields extractedFields = new TimeBasedExtractedFields(timeField, Arrays.asList(timeField,
docValue1, docValue2, scriptField1, scriptField2, sourceField1, sourceField2));
@ -182,12 +180,9 @@ public class TimeBasedExtractedFieldsTests extends ESTestCase {
assertThat(extractedFields.getSourceFields()[0], equalTo("airline"));
assertThat(extractedFields.getAllFields().size(), equalTo(3));
assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("time")).findFirst().get().getAlias(),
equalTo("time"));
assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airport.keyword")).findFirst().get().getAlias(),
equalTo("airport.keyword"));
assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airline")).findFirst().get().getAlias(),
equalTo("airline.text"));
assertThat(findField("time", extractedFields).getSearchField(), equalTo("time"));
assertThat(findField("airport.keyword", extractedFields).getSearchField(), equalTo("airport.keyword"));
assertThat(findField("airline.text", extractedFields).getSearchField(), equalTo("airline"));
}
public void testBuildGivenTimeFieldIsNotAggregatable() {
@ -253,4 +248,8 @@ public class TimeBasedExtractedFieldsTests extends ESTestCase {
when(fieldCaps.isAggregatable()).thenReturn(isAggregatable);
return fieldCaps;
}
private static ExtractedField findField(String name, ExtractedFields fields) {
return fields.getAllFields().stream().filter(f -> f.getName().equals(name)).findFirst().get();
}
}

View File

@ -27,8 +27,9 @@ import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetectionTests;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression;
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
import org.elasticsearch.xpack.ml.extractor.DocValueField;
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
import org.elasticsearch.xpack.ml.extractor.SourceField;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import org.junit.Before;
import org.mockito.ArgumentCaptor;
@ -78,8 +79,8 @@ public class DataFrameDataExtractorTests extends ESTestCase {
indices = Arrays.asList("index-1", "index-2");
query = QueryBuilders.matchAllQuery();
extractedFields = new ExtractedFields(Arrays.asList(
ExtractedField.newField("field_1", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_2", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE)));
new DocValueField("field_1", Collections.singleton("keyword")),
new DocValueField("field_2", Collections.singleton("keyword"))));
scrollSize = 1000;
headers = Collections.emptyMap();
@ -295,8 +296,8 @@ public class DataFrameDataExtractorTests extends ESTestCase {
public void testIncludeSourceIsFalseAndAtLeastOneSourceField() throws IOException {
extractedFields = new ExtractedFields(Arrays.asList(
ExtractedField.newField("field_1", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_2", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE)));
new DocValueField("field_1", Collections.singleton("keyword")),
new SourceField("field_2", Collections.singleton("text"))));
TestExtractor dataExtractor = createExtractor(false, false);
@ -391,15 +392,15 @@ public class DataFrameDataExtractorTests extends ESTestCase {
public void testGetCategoricalFields() {
extractedFields = new ExtractedFields(Arrays.asList(
ExtractedField.newField("field_boolean", Collections.singleton("boolean"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_float", Collections.singleton("float"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_double", Collections.singleton("double"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_byte", Collections.singleton("byte"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_short", Collections.singleton("short"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_integer", Collections.singleton("integer"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_long", Collections.singleton("long"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_keyword", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE),
ExtractedField.newField("field_text", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE)));
new DocValueField("field_boolean", Collections.singleton("boolean")),
new DocValueField("field_float", Collections.singleton("float")),
new DocValueField("field_double", Collections.singleton("double")),
new DocValueField("field_byte", Collections.singleton("byte")),
new DocValueField("field_short", Collections.singleton("short")),
new DocValueField("field_integer", Collections.singleton("integer")),
new DocValueField("field_long", Collections.singleton("long")),
new DocValueField("field_keyword", Collections.singleton("keyword")),
new SourceField("field_text", Collections.singleton("text"))));
TestExtractor dataExtractor = createExtractor(true, true);
assertThat(dataExtractor.getCategoricalFields(OutlierDetectionTests.createRandom()), empty());

View File

@ -53,7 +53,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
List<ExtractedField> allFields = extractedFields.getAllFields();
assertThat(allFields.size(), equalTo(1));
assertThat(allFields.get(0).getName(), equalTo("some_float"));
assertThat(allFields.get(0).getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE));
assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
}
public void testDetect_GivenNumericFieldWithMultipleTypes() {
@ -68,7 +68,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
List<ExtractedField> allFields = extractedFields.getAllFields();
assertThat(allFields.size(), equalTo(1));
assertThat(allFields.get(0).getName(), equalTo("some_number"));
assertThat(allFields.get(0).getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE));
assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
}
public void testDetect_GivenOutlierDetectionAndNonNumericField() {
@ -111,8 +111,8 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
assertThat(allFields.size(), equalTo(3));
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()),
containsInAnyOrder("some_float", "some_long", "some_boolean"));
assertThat(allFields.stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)));
assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
}
public void testDetect_GivenRegressionAndMultipleFields() {
@ -132,8 +132,8 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
assertThat(allFields.size(), equalTo(5));
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()),
containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean"));
assertThat(allFields.stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)));
assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
}
public void testDetect_GivenRegressionAndRequiredFieldMissing() {
@ -442,8 +442,8 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)));
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
}
public void testDetect_GivenEqualFieldsToDocValuesLimit() {
@ -461,8 +461,8 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.ExtractionMethod.DOC_VALUE)));
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
}
public void testDetect_GivenMoreFieldsThanDocValuesLimit() {
@ -480,8 +480,8 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getExtractionMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.ExtractionMethod.SOURCE)));
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.SOURCE)));
}
public void testDetect_GivenBooleanField_BooleanMappedAsInteger() {
@ -497,7 +497,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
assertThat(allFields.size(), equalTo(1));
ExtractedField booleanField = allFields.get(0);
assertThat(booleanField.getTypes(), contains("boolean"));
assertThat(booleanField.getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE));
assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build();
assertThat(booleanField.value(hit), arrayContaining(1));
@ -522,7 +522,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
assertThat(allFields.size(), equalTo(1));
ExtractedField booleanField = allFields.get(0);
assertThat(booleanField.getTypes(), contains("boolean"));
assertThat(booleanField.getExtractionMethod(), equalTo(ExtractedField.ExtractionMethod.DOC_VALUE));
assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build();
assertThat(booleanField.value(hit), arrayContaining("true"));

View File

@ -0,0 +1,79 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import java.util.Arrays;
import java.util.Collections;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
public class DocValueFieldTests extends ESTestCase {
public void testKeyword() {
SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build();
ExtractedField field = new DocValueField("a_keyword", Collections.singleton("keyword"));
assertThat(field.value(hit), equalTo(new String[] { "bar" }));
assertThat(field.getName(), equalTo("a_keyword"));
assertThat(field.getSearchField(), equalTo("a_keyword"));
assertThat(field.getTypes(), contains("keyword"));
assertThat(field.getDocValueFormat(), is(nullValue()));
assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
assertThat(field.supportsFromSource(), is(true));
assertThat(field.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> field.getParentField());
}
public void testKeywordArray() {
SearchHit hit = new SearchHitBuilder(42).addField("array", Arrays.asList("a", "b")).build();
ExtractedField field = new DocValueField("array", Collections.singleton("keyword"));
assertThat(field.value(hit), equalTo(new String[] { "a", "b" }));
assertThat(field.getName(), equalTo("array"));
assertThat(field.getSearchField(), equalTo("array"));
assertThat(field.getTypes(), contains("keyword"));
assertThat(field.getDocValueFormat(), is(nullValue()));
assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
assertThat(field.supportsFromSource(), is(true));
assertThat(field.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> field.getParentField());
ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword"));
assertThat(missing.value(hit), equalTo(new Object[0]));
}
public void testMissing() {
SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build();
ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword"));
assertThat(missing.value(hit), equalTo(new Object[0]));
}
public void testNewFromSource() {
ExtractedField field = new DocValueField("foo", Collections.singleton("keyword"));
ExtractedField fromSource = field.newFromSource();
assertThat(fromSource.getName(), equalTo("foo"));
assertThat(fromSource.getSearchField(), equalTo("foo"));
assertThat(fromSource.getTypes(), contains("keyword"));
expectThrows(UnsupportedOperationException.class, () -> fromSource.getDocValueFormat());
assertThat(fromSource.getMethod(), equalTo(ExtractedField.Method.SOURCE));
assertThat(fromSource.supportsFromSource(), is(true));
assertThat(fromSource.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> fromSource.getParentField());
}
}

View File

@ -1,194 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import java.util.Arrays;
import java.util.Collections;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.startsWith;
public class ExtractedFieldTests extends ESTestCase {
public void testValueGivenDocValue() {
SearchHit hit = new SearchHitBuilder(42).addField("single", "bar").addField("array", Arrays.asList("a", "b")).build();
ExtractedField single = ExtractedField.newField("single", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(single.value(hit), equalTo(new String[] { "bar" }));
ExtractedField array = ExtractedField.newField("array", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(array.value(hit), equalTo(new String[] { "a", "b" }));
ExtractedField missing = ExtractedField.newField("missing",Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(missing.value(hit), equalTo(new Object[0]));
}
public void testValueGivenScriptField() {
SearchHit hit = new SearchHitBuilder(42).addField("single", "bar").addField("array", Arrays.asList("a", "b")).build();
ExtractedField single = ExtractedField.newField("single",Collections.emptySet(),
ExtractedField.ExtractionMethod.SCRIPT_FIELD);
assertThat(single.value(hit), equalTo(new String[] { "bar" }));
ExtractedField array = ExtractedField.newField("array", Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD);
assertThat(array.value(hit), equalTo(new String[] { "a", "b" }));
ExtractedField missing = ExtractedField.newField("missing", Collections.emptySet(), ExtractedField.ExtractionMethod.SCRIPT_FIELD);
assertThat(missing.value(hit), equalTo(new Object[0]));
}
public void testValueGivenSource() {
SearchHit hit = new SearchHitBuilder(42).setSource("{\"single\":\"bar\",\"array\":[\"a\",\"b\"]}").build();
ExtractedField single = ExtractedField.newField("single", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE);
assertThat(single.value(hit), equalTo(new String[] { "bar" }));
ExtractedField array = ExtractedField.newField("array", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE);
assertThat(array.value(hit), equalTo(new String[] { "a", "b" }));
ExtractedField missing = ExtractedField.newField("missing", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE);
assertThat(missing.value(hit), equalTo(new Object[0]));
}
public void testValueGivenNestedSource() {
SearchHit hit = new SearchHitBuilder(42).setSource("{\"level_1\":{\"level_2\":{\"foo\":\"bar\"}}}").build();
ExtractedField nested = ExtractedField.newField("alias", "level_1.level_2.foo", Collections.singleton("text"),
ExtractedField.ExtractionMethod.SOURCE);
assertThat(nested.value(hit), equalTo(new String[] { "bar" }));
}
public void testGeoPoint() {
double lat = 38.897676;
double lon = -77.03653;
String[] expected = new String[] {lat + "," + lon};
// doc_value field
ExtractedField geo = ExtractedField.newGeoPointField("geo", "geo");
SearchHit hit = new SearchHitBuilder(42).addField("geo", lat + ", " + lon).build();
assertThat(geo.value(hit), equalTo(expected));
}
public void testGeoShape() {
double lat = 38.897676;
double lon = -77.03653;
String[] expected = new String[] {lat + "," + lon};
// object format
SearchHit hit = new SearchHitBuilder(42)
.setSource("{\"geo\":{\"type\":\"point\", \"coordinates\": [" + lon + ", " + lat + "]}}")
.build();
ExtractedField geo = ExtractedField.newGeoShapeField("geo", "geo");
assertThat(geo.value(hit), equalTo(expected));
// WKT format
hit = new SearchHitBuilder(42).setSource("{\"geo\":\"POINT ("+ lon + " " + lat + ")\"}").build();
geo = ExtractedField.newGeoShapeField("geo", "geo");
assertThat(geo.value(hit), equalTo(expected));
}
public void testValueGivenSourceAndHitWithNoSource() {
ExtractedField missing = ExtractedField.newField("missing", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE);
assertThat(missing.value(new SearchHitBuilder(3).build()), equalTo(new Object[0]));
}
public void testValueGivenMismatchingMethod() {
SearchHit hit = new SearchHitBuilder(42).addField("a", 1).setSource("{\"b\":2}").build();
ExtractedField invalidA = ExtractedField.newField("a", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE);
assertThat(invalidA.value(hit), equalTo(new Object[0]));
ExtractedField validA = ExtractedField.newField("a", Collections.singleton("keyword"), ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(validA.value(hit), equalTo(new Integer[] { 1 }));
ExtractedField invalidB = ExtractedField.newField("b", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(invalidB.value(hit), equalTo(new Object[0]));
ExtractedField validB = ExtractedField.newField("b", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE);
assertThat(validB.value(hit), equalTo(new Integer[] { 2 }));
}
public void testValueGivenEmptyHit() {
SearchHit hit = new SearchHitBuilder(42).build();
ExtractedField docValue = ExtractedField.newField("a", Collections.singleton("text"), ExtractedField.ExtractionMethod.SOURCE);
assertThat(docValue.value(hit), equalTo(new Object[0]));
ExtractedField sourceField = ExtractedField.newField("b", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(sourceField.value(hit), equalTo(new Object[0]));
}
public void testNewTimeFieldGivenSource() {
expectThrows(IllegalArgumentException.class, () -> ExtractedField.newTimeField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.SOURCE));
}
public void testValueGivenStringTimeField() {
final long millis = randomLong();
final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", Long.toString(millis)).build();
final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(timeField.value(hit), equalTo(new Object[] { millis }));
}
public void testValueGivenLongTimeField() {
final long millis = randomLong();
final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build();
final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(timeField.value(hit), equalTo(new Object[] { millis }));
}
public void testValueGivenPre6xTimeField() {
// Prior to 6.x, timestamps were simply `long` milliseconds-past-the-epoch values
final long millis = randomLong();
final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build();
final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(timeField.value(hit), equalTo(new Object[] { millis }));
}
public void testValueGivenUnknownFormatTimeField() {
final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", new Object()).build();
final ExtractedField timeField = ExtractedField.newTimeField("time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(expectThrows(IllegalStateException.class, () -> timeField.value(hit)).getMessage(),
startsWith("Unexpected value for a time field"));
}
public void testAliasVersusName() {
SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("b", 2).build();
ExtractedField field = ExtractedField.newField("a", "a", Collections.singleton("int"),
ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(field.getAlias(), equalTo("a"));
assertThat(field.getName(), equalTo("a"));
assertThat(field.value(hit), equalTo(new Integer[] { 1 }));
hit = new SearchHitBuilder(42).addField("a", 1).addField("b", 2).build();
field = ExtractedField.newField("a", "b", Collections.singleton("int"), ExtractedField.ExtractionMethod.DOC_VALUE);
assertThat(field.getAlias(), equalTo("a"));
assertThat(field.getName(), equalTo("b"));
assertThat(field.value(hit), equalTo(new Integer[] { 2 }));
}
public void testGetDocValueFormat() {
for (ExtractedField.ExtractionMethod method : ExtractedField.ExtractionMethod.values()) {
assertThat(ExtractedField.newField("f", Collections.emptySet(), method).getDocValueFormat(), equalTo(null));
}
assertThat(ExtractedField.newTimeField("doc_value_time", Collections.singleton("date"),
ExtractedField.ExtractionMethod.DOC_VALUE).getDocValueFormat(), equalTo("epoch_millis"));
assertThat(ExtractedField.newTimeField("source_time", Collections.emptySet(),
ExtractedField.ExtractionMethod.SCRIPT_FIELD).getDocValueFormat(), equalTo("epoch_millis"));
}
}

View File

@ -7,12 +7,9 @@ package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.Detector;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import java.util.Arrays;
import java.util.Collections;
@ -21,24 +18,19 @@ import java.util.HashSet;
import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class ExtractedFieldsTests extends ESTestCase {
public void testAllTypesOfFields() {
ExtractedField docValue1 = ExtractedField.newField("doc1", Collections.singleton("keyword"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField docValue2 = ExtractedField.newField("doc2", Collections.singleton("ip"),
ExtractedField.ExtractionMethod.DOC_VALUE);
ExtractedField scriptField1 = ExtractedField.newField("scripted1", Collections.emptySet(),
ExtractedField.ExtractionMethod.SCRIPT_FIELD);
ExtractedField scriptField2 = ExtractedField.newField("scripted2", Collections.emptySet(),
ExtractedField.ExtractionMethod.SCRIPT_FIELD);
ExtractedField sourceField1 = ExtractedField.newField("src1", Collections.singleton("text"),
ExtractedField.ExtractionMethod.SOURCE);
ExtractedField sourceField2 = ExtractedField.newField("src2", Collections.singleton("text"),
ExtractedField.ExtractionMethod.SOURCE);
ExtractedField docValue1 = new DocValueField("doc1", Collections.singleton("keyword"));
ExtractedField docValue2 = new DocValueField("doc2", Collections.singleton("ip"));
ExtractedField scriptField1 = new ScriptField("scripted1");
ExtractedField scriptField2 = new ScriptField("scripted2");
ExtractedField sourceField1 = new SourceField("src1", Collections.singleton("text"));
ExtractedField sourceField2 = new SourceField("src2", Collections.singleton("text"));
ExtractedFields extractedFields = new ExtractedFields(Arrays.asList(
docValue1, docValue2, scriptField1, scriptField2, sourceField1, sourceField2));
@ -74,16 +66,6 @@ public class ExtractedFieldsTests extends ESTestCase {
}
public void testBuildGivenMultiFields() {
Job.Builder jobBuilder = new Job.Builder("foo");
jobBuilder.setDataDescription(new DataDescription.Builder());
Detector.Builder detector = new Detector.Builder("count", null);
detector.setByFieldName("airline.text");
detector.setOverFieldName("airport.keyword");
jobBuilder.setAnalysisConfig(new AnalysisConfig.Builder(Collections.singletonList(detector.build())));
DatafeedConfig.Builder datafeedBuilder = new DatafeedConfig.Builder("feed", jobBuilder.getId());
datafeedBuilder.setIndices(Collections.singletonList("foo"));
Map<String, FieldCapabilities> text = new HashMap<>();
text.put("text", createFieldCaps(false));
Map<String, FieldCapabilities> keyword = new HashMap<>();
@ -103,10 +85,34 @@ public class ExtractedFieldsTests extends ESTestCase {
assertThat(extractedFields.getSourceFields()[0], equalTo("airline"));
assertThat(extractedFields.getAllFields().size(), equalTo(2));
assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airport.keyword")).findFirst().get().getAlias(),
equalTo("airport.keyword"));
assertThat(extractedFields.getAllFields().stream().filter(f -> f.getName().equals("airline")).findFirst().get().getAlias(),
equalTo("airline.text"));
ExtractedField airlineField = extractedFields.getAllFields().get(0);
assertThat(airlineField.isMultiField(), is(true));
assertThat(airlineField.getName(), equalTo("airline.text"));
assertThat(airlineField.getSearchField(), equalTo("airline"));
assertThat(airlineField.getParentField(), equalTo("airline"));
ExtractedField airportField = extractedFields.getAllFields().get(1);
assertThat(airportField.isMultiField(), is(true));
assertThat(airportField.getName(), equalTo("airport.keyword"));
assertThat(airportField.getSearchField(), equalTo("airport.keyword"));
assertThat(airportField.getParentField(), equalTo("airport"));
}
public void testApplyBooleanMapping() {
DocValueField aBool = new DocValueField("a_bool", Collections.singleton("boolean"));
ExtractedField mapped = ExtractedFields.applyBooleanMapping(aBool, 1, 0);
SearchHit hitTrue = new SearchHitBuilder(42).addField("a_bool", true).build();
SearchHit hitFalse = new SearchHitBuilder(42).addField("a_bool", false).build();
assertThat(mapped.value(hitTrue), equalTo(new Integer[] { 1 }));
assertThat(mapped.value(hitFalse), equalTo(new Integer[] { 0 }));
assertThat(mapped.getName(), equalTo(aBool.getName()));
assertThat(mapped.getMethod(), equalTo(aBool.getMethod()));
assertThat(mapped.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, () -> mapped.newFromSource());
}
public void testBuildGivenFieldWithoutMappings() {

View File

@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
public class GeoPointFieldTests extends ESTestCase {
public void testGivenGeoPoint() {
double lat = 38.897676;
double lon = -77.03653;
String[] expected = new String[] {lat + "," + lon};
SearchHit hit = new SearchHitBuilder(42).addField("geo", lat + ", " + lon).build();
// doc_value field
ExtractedField geo = new GeoPointField("geo");
assertThat(geo.value(hit), equalTo(expected));
assertThat(geo.getName(), equalTo("geo"));
assertThat(geo.getSearchField(), equalTo("geo"));
assertThat(geo.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
assertThat(geo.getTypes(), contains("geo_point"));
assertThat(geo.getDocValueFormat(), is(nullValue()));
assertThat(geo.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, () -> geo.newFromSource());
assertThat(geo.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> geo.getParentField());
}
}

View File

@ -0,0 +1,62 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;
public class GeoShapeFieldTests extends ESTestCase {
public void testObjectFormat() {
double lat = 38.897676;
double lon = -77.03653;
String[] expected = new String[] {lat + "," + lon};
SearchHit hit = new SearchHitBuilder(42)
.setSource("{\"geo\":{\"type\":\"point\", \"coordinates\": [" + lon + ", " + lat + "]}}")
.build();
ExtractedField geo = new GeoShapeField("geo");
assertThat(geo.value(hit), equalTo(expected));
assertThat(geo.getName(), equalTo("geo"));
assertThat(geo.getSearchField(), equalTo("geo"));
assertThat(geo.getTypes(), contains("geo_shape"));
assertThat(geo.getMethod(), equalTo(ExtractedField.Method.SOURCE));
assertThat(geo.supportsFromSource(), is(true));
assertThat(geo.newFromSource(), sameInstance(geo));
expectThrows(UnsupportedOperationException.class, () -> geo.getDocValueFormat());
assertThat(geo.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> geo.getParentField());
}
public void testWKTFormat() {
double lat = 38.897676;
double lon = -77.03653;
String[] expected = new String[] {lat + "," + lon};
SearchHit hit = new SearchHitBuilder(42).setSource("{\"geo\":\"POINT ("+ lon + " " + lat + ")\"}").build();
ExtractedField geo = new GeoShapeField("geo");
assertThat(geo.value(hit), equalTo(expected));
assertThat(geo.getName(), equalTo("geo"));
assertThat(geo.getSearchField(), equalTo("geo"));
assertThat(geo.getTypes(), contains("geo_shape"));
assertThat(geo.getMethod(), equalTo(ExtractedField.Method.SOURCE));
assertThat(geo.supportsFromSource(), is(true));
assertThat(geo.newFromSource(), sameInstance(geo));
expectThrows(UnsupportedOperationException.class, () -> geo.getDocValueFormat());
assertThat(geo.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> geo.getParentField());
}
}

View File

@ -0,0 +1,50 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import java.util.Collections;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
public class MultiFieldTests extends ESTestCase {
public void testGivenSameSearchField() {
SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("a.b", 2).build();
ExtractedField wrapped = new DocValueField("a.b", Collections.singleton("integer"));
ExtractedField field = new MultiField("a", wrapped);
assertThat(field.value(hit), equalTo(new Integer[] { 2 }));
assertThat(field.getName(), equalTo("a.b"));
assertThat(field.getSearchField(), equalTo("a.b"));
assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
assertThat(field.isMultiField(), is(true));
assertThat(field.getParentField(), equalTo("a"));
assertThat(field.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, () -> field.newFromSource());
}
public void testGivenDifferentSearchField() {
SearchHit hit = new SearchHitBuilder(42).addField("a", 1).addField("a.b", 2).build();
ExtractedField wrapped = new DocValueField("a", Collections.singleton("integer"));
ExtractedField field = new MultiField("a.b", "a", "a", wrapped);
assertThat(field.value(hit), equalTo(new Integer[] { 1 }));
assertThat(field.getName(), equalTo("a.b"));
assertThat(field.getSearchField(), equalTo("a"));
assertThat(field.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
assertThat(field.isMultiField(), is(true));
assertThat(field.getParentField(), equalTo("a"));
assertThat(field.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, () -> field.newFromSource());
}
}

View File

@ -0,0 +1,64 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import java.util.Arrays;
import java.util.Collections;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
public class ScriptFieldTests extends ESTestCase {
public void testKeyword() {
SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build();
ExtractedField field = new ScriptField("a_keyword");
assertThat(field.value(hit), equalTo(new String[] { "bar" }));
assertThat(field.getName(), equalTo("a_keyword"));
assertThat(field.getSearchField(), equalTo("a_keyword"));
assertThat(field.getTypes().isEmpty(), is(true));
expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat());
assertThat(field.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD));
expectThrows(UnsupportedOperationException.class, () -> field.getParentField());
assertThat(field.isMultiField(), is(false));
assertThat(field.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, () -> field.newFromSource());
}
public void testKeywordArray() {
SearchHit hit = new SearchHitBuilder(42).addField("array", Arrays.asList("a", "b")).build();
ExtractedField field = new ScriptField("array");
assertThat(field.value(hit), equalTo(new String[] { "a", "b" }));
assertThat(field.getName(), equalTo("array"));
assertThat(field.getSearchField(), equalTo("array"));
assertThat(field.getTypes().isEmpty(), is(true));
expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat());
assertThat(field.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD));
expectThrows(UnsupportedOperationException.class, () -> field.getParentField());
assertThat(field.isMultiField(), is(false));
assertThat(field.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, () -> field.newFromSource());
ExtractedField missing = new DocValueField("missing", Collections.singleton("keyword"));
assertThat(missing.value(hit), equalTo(new Object[0]));
}
public void testMissing() {
SearchHit hit = new SearchHitBuilder(42).addField("a_keyword", "bar").build();
ExtractedField missing = new ScriptField("missing");
assertThat(missing.value(hit), equalTo(new Object[0]));
}
}

View File

@ -0,0 +1,70 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import java.util.Collections;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.sameInstance;
public class SourceFieldTests extends ESTestCase {
public void testSingleValue() {
SearchHit hit = new SearchHitBuilder(42).setSource("{\"single\":\"bar\"}").build();
ExtractedField field = new SourceField("single", Collections.singleton("text"));
assertThat(field.value(hit), equalTo(new String[] { "bar" }));
assertThat(field.getName(), equalTo("single"));
assertThat(field.getSearchField(), equalTo("single"));
assertThat(field.getTypes(), contains("text"));
assertThat(field.getMethod(), equalTo(ExtractedField.Method.SOURCE));
assertThat(field.supportsFromSource(), is(true));
assertThat(field.newFromSource(), sameInstance(field));
assertThat(field.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> field.getParentField());
expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat());
}
public void testArray() {
SearchHit hit = new SearchHitBuilder(42).setSource("{\"array\":[\"a\",\"b\"]}").build();
ExtractedField field = new SourceField("array", Collections.singleton("text"));
assertThat(field.value(hit), equalTo(new String[] { "a", "b" }));
assertThat(field.getName(), equalTo("array"));
assertThat(field.getSearchField(), equalTo("array"));
assertThat(field.getTypes(), contains("text"));
assertThat(field.getMethod(), equalTo(ExtractedField.Method.SOURCE));
assertThat(field.supportsFromSource(), is(true));
assertThat(field.newFromSource(), sameInstance(field));
assertThat(field.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> field.getParentField());
expectThrows(UnsupportedOperationException.class, () -> field.getDocValueFormat());
}
public void testMissing() {
SearchHit hit = new SearchHitBuilder(42).setSource("{\"array\":[\"a\",\"b\"]}").build();
ExtractedField missing = new SourceField("missing", Collections.singleton("text"));
assertThat(missing.value(hit), equalTo(new Object[0]));
}
public void testValueGivenNested() {
SearchHit hit = new SearchHitBuilder(42).setSource("{\"level_1\":{\"level_2\":{\"foo\":\"bar\"}}}").build();
ExtractedField nested = new SourceField("level_1.level_2.foo", Collections.singleton("text"));
assertThat(nested.value(hit), equalTo(new String[] { "bar" }));
}
}

View File

@ -0,0 +1,67 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.extractor;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.startsWith;
public class TimeFieldTests extends ESTestCase {
public void testDocValueWithStringValue() {
long millis = randomLong();
SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", Long.toString(millis)).build();
ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE);
assertThat(timeField.value(hit), equalTo(new Object[] { millis }));
assertThat(timeField.getName(), equalTo("time"));
assertThat(timeField.getSearchField(), equalTo("time"));
assertThat(timeField.getTypes(), contains("date"));
assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
assertThat(timeField.getDocValueFormat(), equalTo("epoch_millis"));
assertThat(timeField.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, () -> timeField.newFromSource());
assertThat(timeField.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> timeField.getParentField());
}
public void testScriptWithLongValue() {
long millis = randomLong();
SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", millis).build();
ExtractedField timeField = new TimeField("time", ExtractedField.Method.SCRIPT_FIELD);
assertThat(timeField.value(hit), equalTo(new Object[] { millis }));
assertThat(timeField.getName(), equalTo("time"));
assertThat(timeField.getSearchField(), equalTo("time"));
assertThat(timeField.getTypes(), contains("date"));
assertThat(timeField.getMethod(), equalTo(ExtractedField.Method.SCRIPT_FIELD));
expectThrows(UnsupportedOperationException.class, () -> timeField.getDocValueFormat());
assertThat(timeField.supportsFromSource(), is(false));
expectThrows(UnsupportedOperationException.class, () -> timeField.newFromSource());
assertThat(timeField.isMultiField(), is(false));
expectThrows(UnsupportedOperationException.class, () -> timeField.getParentField());
}
public void testUnknownFormat() {
final SearchHit hit = new SearchHitBuilder(randomInt()).addField("time", new Object()).build();
final ExtractedField timeField = new TimeField("time", ExtractedField.Method.DOC_VALUE);
assertThat(expectThrows(IllegalStateException.class, () -> timeField.value(hit)).getMessage(),
startsWith("Unexpected value for a time field"));
}
public void testSourceNotSupported() {
expectThrows(IllegalArgumentException.class, () -> new TimeField("foo", ExtractedField.Method.SOURCE));
}
}