[ML] Support the unsigned_long type in data frame analytics (#64072)
Adds support for the unsigned_long type to data frame analytics. This type is handled in the same way as the long type. Values sent to the ML native processes are converted to floats and hence will lose accuracy when outside the range where a float can uniquely represent long values. Backport of #64066
This commit is contained in:
parent
96407268a0
commit
adc5509eda
|
@ -30,13 +30,14 @@ public final class Types {
|
|||
|
||||
private static final Set<String> NUMERICAL_TYPES =
|
||||
Collections.unmodifiableSet(
|
||||
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float"))
|
||||
Stream.concat(Stream.of(NumberType.values()).map(NumberType::typeName), Stream.of("scaled_float", "unsigned_long"))
|
||||
.collect(Collectors.toSet()));
|
||||
|
||||
private static final Set<String> DISCRETE_NUMERICAL_TYPES =
|
||||
Collections.unmodifiableSet(
|
||||
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG)
|
||||
.map(NumberType::typeName)
|
||||
Stream.concat(
|
||||
Stream.of(NumberType.BYTE, NumberType.SHORT, NumberType.INTEGER, NumberType.LONG).map(NumberType::typeName),
|
||||
Stream.of("unsigned_long"))
|
||||
.collect(Collectors.toSet()));
|
||||
|
||||
private static final Set<String> BOOL_TYPES = Collections.singleton(BooleanFieldMapper.CONTENT_TYPE);
|
||||
|
|
|
@ -168,7 +168,7 @@ public class DataFrameAnalysisCustomFeatureIT extends MlNativeDataFrameAnalytics
|
|||
" \"type\": \"double\"\n" +
|
||||
" }," +
|
||||
" \""+ DISCRETE_NUMERICAL_FIELD + "\": {\n" +
|
||||
" \"type\": \"integer\"\n" +
|
||||
" \"type\": \"unsigned_long\"\n" +
|
||||
" }," +
|
||||
" \""+ TEXT_FIELD + "\": {\n" +
|
||||
" \"type\": \"text\"\n" +
|
||||
|
|
|
@ -52,7 +52,7 @@ public class ExplainDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsInteg
|
|||
client().admin().indices().prepareCreate(sourceIndex)
|
||||
.addMapping("_doc",
|
||||
"numeric_1", "type=double",
|
||||
"numeric_2", "type=float",
|
||||
"numeric_2", "type=unsigned_long",
|
||||
"categorical", "type=keyword",
|
||||
"filtered_field", "type=keyword")
|
||||
.get();
|
||||
|
@ -64,7 +64,7 @@ public class ExplainDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsInteg
|
|||
IndexRequest indexRequest = new IndexRequest(sourceIndex);
|
||||
indexRequest.source(
|
||||
"numeric_1", 1.0,
|
||||
"numeric_2", 2.0,
|
||||
"numeric_2", 2,
|
||||
"categorical", i % 2 == 0 ? "class_1" : "class_2",
|
||||
"filtered_field", i < 2 ? "bingo" : "rest"); // We tag bingo on the first two docs to ensure we have 2 classes
|
||||
bulkRequestBuilder.add(indexRequest);
|
||||
|
|
|
@ -675,7 +675,7 @@ public class RegressionIT extends MlNativeDataFrameAnalyticsIntegTestCase {
|
|||
" \"type\": \"double\"\n" +
|
||||
" }," +
|
||||
" \"" + DISCRETE_NUMERICAL_FEATURE_FIELD + "\": {\n" +
|
||||
" \"type\": \"long\"\n" +
|
||||
" \"type\": \"unsigned_long\"\n" +
|
||||
" }," +
|
||||
" \"" + DEPENDENT_VARIABLE_FIELD + "\": {\n" +
|
||||
" \"type\": \"double\"\n" +
|
||||
|
|
|
@ -72,7 +72,7 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
String sourceIndex = "test-outlier-detection-with-few-docs";
|
||||
|
||||
client().admin().indices().prepareCreate(sourceIndex)
|
||||
.addMapping("_doc", "numeric_1", "type=double", "numeric_2", "type=float", "categorical_1", "type=keyword")
|
||||
.addMapping("_doc", "numeric_1", "type=double", "numeric_2", "type=unsigned_long", "categorical_1", "type=keyword")
|
||||
.get();
|
||||
|
||||
BulkRequestBuilder bulkRequestBuilder = client().prepareBulk();
|
||||
|
@ -84,7 +84,7 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
// We insert one odd value out of 5 for one feature
|
||||
String docId = i == 0 ? "outlier" : "normal" + i;
|
||||
indexRequest.id(docId);
|
||||
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1.0, "categorical_1", "foo_" + i);
|
||||
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1, "categorical_1", "foo_" + i);
|
||||
bulkRequestBuilder.add(indexRequest);
|
||||
}
|
||||
BulkResponse bulkResponse = bulkRequestBuilder.get();
|
||||
|
|
|
@ -105,7 +105,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
assertThat(fieldExtraction.v2().get(0).getName(), equalTo("some_keyword"));
|
||||
assertThat(fieldExtraction.v2().get(0).isIncluded(), is(false));
|
||||
assertThat(fieldExtraction.v2().get(0).getReason(), equalTo("unsupported type; supported types are " +
|
||||
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short]"));
|
||||
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
|
||||
}
|
||||
|
||||
public void testDetect_GivenOutlierDetectionAndFieldWithNumericAndNonNumericTypes() {
|
||||
|
@ -121,7 +121,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
assertThat(fieldExtraction.v2().get(0).getName(), equalTo("indecisive_field"));
|
||||
assertThat(fieldExtraction.v2().get(0).isIncluded(), is(false));
|
||||
assertThat(fieldExtraction.v2().get(0).getReason(), equalTo("unsupported type; supported types are " +
|
||||
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short]"));
|
||||
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
|
||||
}
|
||||
|
||||
public void testDetect_GivenOutlierDetectionAndMultipleFields() {
|
||||
|
@ -147,7 +147,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " +
|
||||
"supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
|
||||
"supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
|
||||
FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
@ -282,7 +282,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
|
||||
|
||||
assertThat(e.getMessage(), equalTo("invalid types [keyword] for required field [foo]; " +
|
||||
"expected types are [byte, double, float, half_float, integer, long, scaled_float, short]"));
|
||||
"expected types are [byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"));
|
||||
}
|
||||
|
||||
public void testDetect_GivenClassificationAndRequiredFieldHasInvalidType() {
|
||||
|
@ -298,7 +298,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
|
||||
|
||||
assertThat(e.getMessage(), equalTo("invalid types [float] for required field [some_float]; " +
|
||||
"expected types are [boolean, byte, integer, ip, keyword, long, short, text]"));
|
||||
"expected types are [boolean, byte, integer, ip, keyword, long, short, text, unsigned_long]"));
|
||||
}
|
||||
|
||||
public void testDetect_GivenClassificationAndDependentVariableHasInvalidCardinality() {
|
||||
|
@ -371,7 +371,8 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.excluded("categorical", Collections.singleton("keyword"),
|
||||
"unsupported type; supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
|
||||
"unsupported type; supported types are " +
|
||||
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
|
||||
FieldSelection.included("numeric", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
@ -471,7 +472,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
|
||||
|
||||
assertThat(e.getMessage(), equalTo("field [your_keyword] has unsupported type [keyword]. " +
|
||||
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]."));
|
||||
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]."));
|
||||
}
|
||||
|
||||
public void testDetect_GivenNotIncludedFieldHasUnsupportedType() {
|
||||
|
@ -492,7 +493,8 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.excluded("categorical", Collections.singleton("keyword"),
|
||||
"unsupported type; supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
|
||||
"unsupported type; supported types are " +
|
||||
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]"),
|
||||
FieldSelection.included("numeric", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
@ -517,7 +519,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " +
|
||||
"are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]")
|
||||
"are [boolean, byte, double, float, half_float, integer, long, scaled_float, short, unsigned_long]")
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -225,7 +225,7 @@
|
|||
- match: { field_selection.2.is_included: false }
|
||||
- match: { field_selection.2.is_required: false }
|
||||
- is_false: field_selection.2.feature_type
|
||||
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
|
||||
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text, unsigned_long]" }
|
||||
- match: { field_selection.3.name: "field_4" }
|
||||
- match: { field_selection.3.mapping_types: ["text"] }
|
||||
- match: { field_selection.3.is_included: false }
|
||||
|
@ -299,7 +299,7 @@
|
|||
- match: { field_selection.2.is_included: false }
|
||||
- match: { field_selection.2.is_required: false }
|
||||
- is_false: field_selection.2.feature_type
|
||||
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
|
||||
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text, unsigned_long]" }
|
||||
- match: { field_selection.3.name: "field_4" }
|
||||
- match: { field_selection.3.mapping_types: ["text"] }
|
||||
- match: { field_selection.3.is_included: false }
|
||||
|
|
Loading…
Reference in New Issue