[7.x] Handle nested and aliased fields correctly when copying mapping. (#50918) (#50968)

This commit is contained in:
Przemysław Witek 2020-01-14 14:43:39 +01:00 committed by GitHub
parent f028ab08d1
commit 9c6ffdc2be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 191 additions and 44 deletions

View File

@ -63,6 +63,9 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
private static final String NUMERICAL_FIELD = "numerical-field";
private static final String DISCRETE_NUMERICAL_FIELD = "discrete-numerical-field";
private static final String KEYWORD_FIELD = "keyword-field";
private static final String NESTED_FIELD = "outer-field.inner-field";
private static final String ALIAS_TO_KEYWORD_FIELD = "alias-to-keyword-field";
private static final String ALIAS_TO_NESTED_FIELD = "alias-to-nested-field";
private static final List<Boolean> BOOLEAN_FIELD_VALUES = Collections.unmodifiableList(Arrays.asList(false, true));
private static final List<Double> NUMERICAL_FIELD_VALUES = Collections.unmodifiableList(Arrays.asList(1.0, 2.0));
private static final List<Integer> DISCRETE_NUMERICAL_FIELD_VALUES = Collections.unmodifiableList(Arrays.asList(10, 20));
@ -301,7 +304,6 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
assertInferenceModelPersisted(jobId);
assertMlResultsFieldMappings(predictedClassField, "keyword");
assertEvaluation(KEYWORD_FIELD, KEYWORD_FIELD_VALUES, "ml." + predictedClassField);
}
public void testDependentVariableCardinalityTooHighError() throws Exception {
@ -342,6 +344,63 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
assertProgress(jobId, 100, 100, 100, 100);
}
public void testDependentVariableIsNested() throws Exception {
initialize("dependent_variable_is_nested");
String predictedClassField = NESTED_FIELD + "_prediction";
indexData(sourceIndex, 100, 0, NESTED_FIELD);
DataFrameAnalyticsConfig config = buildAnalytics(jobId, sourceIndex, destIndex, null, new Classification(NESTED_FIELD));
registerAnalytics(config);
putAnalytics(config);
startAnalytics(jobId);
waitUntilAnalyticsIsStopped(jobId);
assertProgress(jobId, 100, 100, 100, 100);
assertThat(searchStoredProgress(jobId).getHits().getTotalHits().value, equalTo(1L));
assertModelStatePersisted(stateDocId());
assertInferenceModelPersisted(jobId);
assertMlResultsFieldMappings(predictedClassField, "keyword");
assertEvaluation(NESTED_FIELD, KEYWORD_FIELD_VALUES, "ml." + predictedClassField);
}
public void testDependentVariableIsAliasToKeyword() throws Exception {
initialize("dependent_variable_is_alias");
String predictedClassField = ALIAS_TO_KEYWORD_FIELD + "_prediction";
indexData(sourceIndex, 100, 0, KEYWORD_FIELD);
DataFrameAnalyticsConfig config = buildAnalytics(jobId, sourceIndex, destIndex, null, new Classification(ALIAS_TO_KEYWORD_FIELD));
registerAnalytics(config);
putAnalytics(config);
startAnalytics(jobId);
waitUntilAnalyticsIsStopped(jobId);
assertProgress(jobId, 100, 100, 100, 100);
assertThat(searchStoredProgress(jobId).getHits().getTotalHits().value, equalTo(1L));
assertModelStatePersisted(stateDocId());
assertInferenceModelPersisted(jobId);
assertMlResultsFieldMappings(predictedClassField, "keyword");
assertEvaluation(ALIAS_TO_KEYWORD_FIELD, KEYWORD_FIELD_VALUES, "ml." + predictedClassField);
}
public void testDependentVariableIsAliasToNested() throws Exception {
initialize("dependent_variable_is_alias_to_nested");
String predictedClassField = ALIAS_TO_NESTED_FIELD + "_prediction";
indexData(sourceIndex, 100, 0, NESTED_FIELD);
DataFrameAnalyticsConfig config = buildAnalytics(jobId, sourceIndex, destIndex, null, new Classification(ALIAS_TO_NESTED_FIELD));
registerAnalytics(config);
putAnalytics(config);
startAnalytics(jobId);
waitUntilAnalyticsIsStopped(jobId);
assertProgress(jobId, 100, 100, 100, 100);
assertThat(searchStoredProgress(jobId).getHits().getTotalHits().value, equalTo(1L));
assertModelStatePersisted(stateDocId());
assertInferenceModelPersisted(jobId);
assertMlResultsFieldMappings(predictedClassField, "keyword");
assertEvaluation(ALIAS_TO_NESTED_FIELD, KEYWORD_FIELD_VALUES, "ml." + predictedClassField);
}
public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception {
String sourceIndex = "classification_two_jobs_with_same_randomize_seed_source";
String dependentVariable = KEYWORD_FIELD;
@ -433,7 +492,10 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
BOOLEAN_FIELD, "type=boolean",
NUMERICAL_FIELD, "type=double",
DISCRETE_NUMERICAL_FIELD, "type=integer",
KEYWORD_FIELD, "type=keyword")
KEYWORD_FIELD, "type=keyword",
NESTED_FIELD, "type=keyword",
ALIAS_TO_KEYWORD_FIELD, "type=alias,path=" + KEYWORD_FIELD,
ALIAS_TO_NESTED_FIELD, "type=alias,path=" + NESTED_FIELD)
.get();
}
@ -445,7 +507,8 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
BOOLEAN_FIELD, BOOLEAN_FIELD_VALUES.get(i % BOOLEAN_FIELD_VALUES.size()),
NUMERICAL_FIELD, NUMERICAL_FIELD_VALUES.get(i % NUMERICAL_FIELD_VALUES.size()),
DISCRETE_NUMERICAL_FIELD, DISCRETE_NUMERICAL_FIELD_VALUES.get(i % DISCRETE_NUMERICAL_FIELD_VALUES.size()),
KEYWORD_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()));
KEYWORD_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()),
NESTED_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size()));
IndexRequest indexRequest = new IndexRequest(sourceIndex).source(source.toArray());
bulkRequestBuilder.add(indexRequest);
}
@ -465,6 +528,9 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
if (KEYWORD_FIELD.equals(dependentVariable) == false) {
source.addAll(Arrays.asList(KEYWORD_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size())));
}
if (NESTED_FIELD.equals(dependentVariable) == false) {
source.addAll(Arrays.asList(NESTED_FIELD, KEYWORD_FIELD_VALUES.get(i % KEYWORD_FIELD_VALUES.size())));
}
IndexRequest indexRequest = new IndexRequest(sourceIndex).source(source.toArray());
bulkRequestBuilder.add(indexRequest);
}
@ -487,10 +553,12 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
}
/**
* Wrapper around extractValue with implicit casting to the appropriate type.
* Wrapper around extractValue that:
* - allows dots (".") in the path elements provided as arguments
* - supports implicit casting to the appropriate type
*/
private static <T> T getFieldValue(Map<String, Object> doc, String... path) {
return (T)extractValue(doc, path);
return (T)extractValue(String.join(".", path), doc);
}
private static <T> void assertTopClasses(Map<String, Object> resultsObject,
@ -583,8 +651,14 @@ public class ClassificationIT extends MlNativeDataFrameAnalyticsIntegTestCase {
.get(destIndex)
.get("_doc")
.sourceAsMap();
assertThat(getFieldValue(mappings, "properties", "ml", "properties", predictedClassField, "type"), equalTo(expectedType));
assertThat(
mappings.toString(),
getFieldValue(
mappings,
"properties", "ml", "properties", String.join(".properties.", predictedClassField.split("\\.")), "type"),
equalTo(expectedType));
assertThat(
mappings.toString(),
getFieldValue(mappings, "properties", "ml", "properties", "top_classes", "properties", "class_name", "type"),
equalTo(expectedType));
}

View File

@ -25,6 +25,8 @@ import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSortConfig;
import org.elasticsearch.index.mapper.FieldAliasMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.xpack.core.ClientHelper;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
@ -39,6 +41,7 @@ import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Supplier;
import static org.elasticsearch.common.xcontent.support.XContentMapValues.extractValue;
import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
/**
@ -160,23 +163,38 @@ public final class DataFrameAnalyticsIndex {
return maxValue;
}
@SuppressWarnings("unchecked")
private static Map<String, Object> createAdditionalMappings(DataFrameAnalyticsConfig config, Map<String, Object> mappingsProperties) {
Map<String, Object> properties = new HashMap<>();
Map<String, String> idCopyMapping = new HashMap<>();
idCopyMapping.put("type", "keyword");
idCopyMapping.put("type", KeywordFieldMapper.CONTENT_TYPE);
properties.put(ID_COPY, idCopyMapping);
for (Map.Entry<String, String> entry
: config.getAnalysis().getExplicitlyMappedFields(config.getDest().getResultsField()).entrySet()) {
String destFieldPath = entry.getKey();
String sourceFieldPath = entry.getValue();
Object sourceFieldMapping = mappingsProperties.get(sourceFieldPath);
if (sourceFieldMapping != null) {
Object sourceFieldMapping = extractMapping(sourceFieldPath, mappingsProperties);
if (sourceFieldMapping instanceof Map) {
Map<String, Object> sourceFieldMappingAsMap = (Map) sourceFieldMapping;
// If the source field is an alias, fetch the concrete field that the alias points to.
if (FieldAliasMapper.CONTENT_TYPE.equals(sourceFieldMappingAsMap.get("type"))) {
String path = (String) sourceFieldMappingAsMap.get(FieldAliasMapper.Names.PATH);
sourceFieldMapping = extractMapping(path, mappingsProperties);
}
}
// We may have updated the value of {@code sourceFieldMapping} in the "if" block above.
// Hence, we need to check the "instanceof" condition again.
if (sourceFieldMapping instanceof Map) {
properties.put(destFieldPath, sourceFieldMapping);
}
}
return properties;
}
private static Object extractMapping(String path, Map<String, Object> mappingsProperties) {
return extractValue(String.join("." + PROPERTIES + ".", path.split("\\.")), mappingsProperties);
}
private static Map<String, Object> createMetaData(String analyticsId, Clock clock) {
Map<String, Object> metadata = new HashMap<>();
metadata.put(CREATION_DATE_MILLIS, clock.millis());
@ -239,4 +257,3 @@ public final class DataFrameAnalyticsIndex {
}
}
}

View File

@ -73,7 +73,11 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
private static final String ANALYTICS_ID = "some-analytics-id";
private static final String[] SOURCE_INDEX = new String[] {"source-index"};
private static final String DEST_INDEX = "dest-index";
private static final String DEPENDENT_VARIABLE = "dep_var";
private static final String NUMERICAL_FIELD = "numerical-field";
private static final String OUTER_FIELD = "outer-field";
private static final String INNER_FIELD = "inner-field";
private static final String ALIAS_TO_NUMERICAL_FIELD = "alias-to-numerical-field";
private static final String ALIAS_TO_NESTED_FIELD = "alias-to-nested-field";
private static final int CURRENT_TIME_MILLIS = 123456789;
private static final String CREATED_BY = "data-frame-analytics";
@ -118,19 +122,24 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
doAnswer(callListenerOnResponse(getSettingsResponse))
.when(client).execute(eq(GetSettingsAction.INSTANCE), getSettingsRequestCaptor.capture(), any());
Map<String, Object> index1Properties = new HashMap<>();
index1Properties.put("field_1", "field_1_mappings");
index1Properties.put("field_2", "field_2_mappings");
index1Properties.put(DEPENDENT_VARIABLE, Collections.singletonMap("type", "integer"));
Map<String, Object> index1Mappings = Collections.singletonMap("properties", index1Properties);
MappingMetaData index1MappingMetaData = new MappingMetaData("_doc", index1Mappings);
Map<String, Object> index2Properties = new HashMap<>();
index2Properties.put("field_1", "field_1_mappings");
index2Properties.put("field_2", "field_2_mappings");
index2Properties.put(DEPENDENT_VARIABLE, Collections.singletonMap("type", "integer"));
Map<String, Object> index2Mappings = Collections.singletonMap("properties", index2Properties);
MappingMetaData index2MappingMetaData = new MappingMetaData("_doc", index2Mappings);
Map<String, Object> indexProperties = new HashMap<>();
indexProperties.put("field_1", "field_1_mappings");
indexProperties.put("field_2", "field_2_mappings");
indexProperties.put(NUMERICAL_FIELD, Collections.singletonMap("type", "integer"));
indexProperties.put(
OUTER_FIELD,
Collections.singletonMap("properties", Collections.singletonMap(INNER_FIELD, Collections.singletonMap("type", "integer"))));
Map<String, Object> aliasToNumericalFieldProperties = new HashMap<>();
aliasToNumericalFieldProperties.put("type", "alias");
aliasToNumericalFieldProperties.put("path", NUMERICAL_FIELD);
indexProperties.put(ALIAS_TO_NUMERICAL_FIELD, aliasToNumericalFieldProperties);
Map<String, Object> aliasToNestedFieldProperties = new HashMap<>();
aliasToNestedFieldProperties.put("type", "alias");
aliasToNestedFieldProperties.put("path", "outer-field.inner-field");
indexProperties.put(ALIAS_TO_NESTED_FIELD, aliasToNestedFieldProperties);
Map<String, Object> indexMappings = Collections.singletonMap("properties", indexProperties);
MappingMetaData index1MappingMetaData = new MappingMetaData("_doc", indexMappings);
MappingMetaData index2MappingMetaData = new MappingMetaData("_doc", indexMappings);
ImmutableOpenMap.Builder<String, MappingMetaData> index1MappingsMap = ImmutableOpenMap.builder();
index1MappingsMap.put("_doc", index1MappingMetaData);
@ -152,7 +161,9 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
config,
ActionListener.wrap(
response -> {},
e -> fail(e.getMessage())));
e -> fail(e.getMessage())
)
);
GetSettingsRequest capturedGetSettingsRequest = getSettingsRequestCaptor.getValue();
assertThat(capturedGetSettingsRequest.indices(), equalTo(SOURCE_INDEX));
@ -175,6 +186,10 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
assertThat(extractValue("_doc.properties.ml__id_copy.type", map), equalTo("keyword"));
assertThat(extractValue("_doc.properties.field_1", map), equalTo("field_1_mappings"));
assertThat(extractValue("_doc.properties.field_2", map), equalTo("field_2_mappings"));
assertThat(extractValue("_doc.properties.numerical-field.type", map), equalTo("integer"));
assertThat(extractValue("_doc.properties.outer-field.properties.inner-field.type", map), equalTo("integer"));
assertThat(extractValue("_doc.properties.alias-to-numerical-field.type", map), equalTo("alias"));
assertThat(extractValue("_doc.properties.alias-to-nested-field.type", map), equalTo("alias"));
assertThat(extractValue("_doc._meta.analytics", map), equalTo(ANALYTICS_ID));
assertThat(extractValue("_doc._meta.creation_date_in_millis", map), equalTo(CURRENT_TIME_MILLIS));
assertThat(extractValue("_doc._meta.created_by", map), equalTo(CREATED_BY));
@ -187,13 +202,31 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
}
public void testCreateDestinationIndex_Regression() throws IOException {
Map<String, Object> map = testCreateDestinationIndex(new Regression(DEPENDENT_VARIABLE));
assertThat(extractValue("_doc.properties.ml.dep_var_prediction.type", map), equalTo("integer"));
Map<String, Object> map = testCreateDestinationIndex(new Regression(NUMERICAL_FIELD));
assertThat(extractValue("_doc.properties.ml.numerical-field_prediction.type", map), equalTo("integer"));
}
public void testCreateDestinationIndex_Classification() throws IOException {
Map<String, Object> map = testCreateDestinationIndex(new Classification(DEPENDENT_VARIABLE));
assertThat(extractValue("_doc.properties.ml.dep_var_prediction.type", map), equalTo("integer"));
Map<String, Object> map = testCreateDestinationIndex(new Classification(NUMERICAL_FIELD));
assertThat(extractValue("_doc.properties.ml.numerical-field_prediction.type", map), equalTo("integer"));
assertThat(extractValue("_doc.properties.ml.top_classes.class_name.type", map), equalTo("integer"));
}
public void testCreateDestinationIndex_Classification_DependentVariableIsNested() throws IOException {
Map<String, Object> map = testCreateDestinationIndex(new Classification(OUTER_FIELD + "." + INNER_FIELD));
assertThat(extractValue("_doc.properties.ml.outer-field.inner-field_prediction.type", map), equalTo("integer"));
assertThat(extractValue("_doc.properties.ml.top_classes.class_name.type", map), equalTo("integer"));
}
public void testCreateDestinationIndex_Classification_DependentVariableIsAlias() throws IOException {
Map<String, Object> map = testCreateDestinationIndex(new Classification(ALIAS_TO_NUMERICAL_FIELD));
assertThat(extractValue("_doc.properties.ml.alias-to-numerical-field_prediction.type", map), equalTo("integer"));
assertThat(extractValue("_doc.properties.ml.top_classes.class_name.type", map), equalTo("integer"));
}
public void testCreateDestinationIndex_Classification_DependentVariableIsAliasToNested() throws IOException {
Map<String, Object> map = testCreateDestinationIndex(new Classification(ALIAS_TO_NESTED_FIELD));
assertThat(extractValue("_doc.properties.ml.alias-to-nested-field_prediction.type", map), equalTo("integer"));
assertThat(extractValue("_doc.properties.ml.top_classes.class_name.type", map), equalTo("integer"));
}
@ -226,10 +259,23 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
);
}
private Map<String, Object> testUpdateMappingsToDestIndex(DataFrameAnalysis analysis,
Map<String, Object> properties) throws IOException {
private Map<String, Object> testUpdateMappingsToDestIndex(DataFrameAnalysis analysis) throws IOException {
DataFrameAnalyticsConfig config = createConfig(analysis);
Map<String, Object> properties = new HashMap<>();
properties.put(NUMERICAL_FIELD, Collections.singletonMap("type", "integer"));
properties.put(
OUTER_FIELD,
Collections.singletonMap("properties", Collections.singletonMap(INNER_FIELD, Collections.singletonMap("type", "integer"))));
Map<String, Object> aliasToNumericalFieldProperties = new HashMap<>();
aliasToNumericalFieldProperties.put("type", "alias");
aliasToNumericalFieldProperties.put("path", NUMERICAL_FIELD);
properties.put(ALIAS_TO_NUMERICAL_FIELD, aliasToNumericalFieldProperties);
Map<String, Object> aliasToNestedFieldProperties = new HashMap<>();
aliasToNestedFieldProperties.put("type", "alias");
aliasToNestedFieldProperties.put("path", "outer-field.inner-field");
properties.put(ALIAS_TO_NESTED_FIELD, aliasToNestedFieldProperties);
MappingMetaData indexMappingMetaData = new MappingMetaData("_doc", Collections.singletonMap("properties", properties));
ImmutableOpenMap.Builder<String, MappingMetaData> indexMappingsMap = ImmutableOpenMap.builder();
indexMappingsMap.put("_doc", indexMappingMetaData);
@ -268,25 +314,35 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
}
public void testUpdateMappingsToDestIndex_OutlierDetection() throws IOException {
testUpdateMappingsToDestIndex(
new OutlierDetection.Builder().build(),
Collections.singletonMap(DEPENDENT_VARIABLE, Collections.singletonMap("type", "integer")));
testUpdateMappingsToDestIndex(new OutlierDetection.Builder().build());
}
public void testUpdateMappingsToDestIndex_Regression() throws IOException {
Map<String, Object> map =
testUpdateMappingsToDestIndex(
new Regression(DEPENDENT_VARIABLE),
Collections.singletonMap(DEPENDENT_VARIABLE, Collections.singletonMap("type", "integer")));
assertThat(extractValue("properties.ml.dep_var_prediction.type", map), equalTo("integer"));
Map<String, Object> map = testUpdateMappingsToDestIndex(new Regression(NUMERICAL_FIELD));
assertThat(extractValue("properties.ml.numerical-field_prediction.type", map), equalTo("integer"));
}
public void testUpdateMappingsToDestIndex_Classification() throws IOException {
Map<String, Object> map =
testUpdateMappingsToDestIndex(
new Classification(DEPENDENT_VARIABLE),
Collections.singletonMap(DEPENDENT_VARIABLE, Collections.singletonMap("type", "integer")));
assertThat(extractValue("properties.ml.dep_var_prediction.type", map), equalTo("integer"));
Map<String, Object> map = testUpdateMappingsToDestIndex(new Classification(NUMERICAL_FIELD));
assertThat(extractValue("properties.ml.numerical-field_prediction.type", map), equalTo("integer"));
assertThat(extractValue("properties.ml.top_classes.class_name.type", map), equalTo("integer"));
}
public void testUpdateMappingsToDestIndex_Classification_DependentVariableIsNested() throws IOException {
Map<String, Object> map = testUpdateMappingsToDestIndex(new Classification(OUTER_FIELD + "." + INNER_FIELD));
assertThat(extractValue("properties.ml.outer-field.inner-field_prediction.type", map), equalTo("integer"));
assertThat(extractValue("properties.ml.top_classes.class_name.type", map), equalTo("integer"));
}
public void testUpdateMappingsToDestIndex_Classification_DependentVariableIsAlias() throws IOException {
Map<String, Object> map = testUpdateMappingsToDestIndex(new Classification(ALIAS_TO_NUMERICAL_FIELD));
assertThat(extractValue("properties.ml.alias-to-numerical-field_prediction.type", map), equalTo("integer"));
assertThat(extractValue("properties.ml.top_classes.class_name.type", map), equalTo("integer"));
}
public void testUpdateMappingsToDestIndex_Classification_DependentVariableIsAliasToNested() throws IOException {
Map<String, Object> map = testUpdateMappingsToDestIndex(new Classification(ALIAS_TO_NESTED_FIELD));
assertThat(extractValue("properties.ml.alias-to-nested-field_prediction.type", map), equalTo("integer"));
assertThat(extractValue("properties.ml.top_classes.class_name.type", map), equalTo("integer"));
}