[7.x][ML] Avoid assertion error on empty string feature values for inference (#58541) (#58550)

It is possible for the source document to have an empty string value
for a field that is mapped as numeric. We should treat those as missing
values and avoid throwing an assertion error.

Backport of #58541
This commit is contained in:
Dimitris Athanasiou 2020-06-25 18:07:29 +03:00 committed by GitHub
parent 5af7071db0
commit c3dfafe0b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 15 deletions

View File

@ -96,16 +96,23 @@ public final class InferenceHelpers {
return ((Number)value).doubleValue(); return ((Number)value).doubleValue();
} }
if (value instanceof String) { if (value instanceof String) {
try { return stringToDouble((String) value);
return Double.valueOf((String)value);
} catch (NumberFormatException nfe) {
assert false : "value is not properly formatted double [" + value + "]";
return null;
}
} }
return null; return null;
} }
private static Double stringToDouble(String value) {
if (value.isEmpty()) {
return null;
}
try {
return Double.valueOf(value);
} catch (NumberFormatException nfe) {
assert false : "value is not properly formatted double [" + value + "]";
return null;
}
}
public static Map<String, double[]> decodeFeatureImportances(Map<String, String> processedFeatureToOriginalFeatureMap, public static Map<String, double[]> decodeFeatureImportances(Map<String, String> processedFeatureToOriginalFeatureMap,
Map<String, double[]> featureImportances) { Map<String, double[]> featureImportances) {
if (processedFeatureToOriginalFeatureMap == null || processedFeatureToOriginalFeatureMap.isEmpty()) { if (processedFeatureToOriginalFeatureMap == null || processedFeatureToOriginalFeatureMap.isEmpty()) {

View File

@ -17,18 +17,18 @@ import static org.hamcrest.Matchers.nullValue;
public class InferenceHelpersTests extends ESTestCase { public class InferenceHelpersTests extends ESTestCase {
public void testToDoubleFromNumbers() { public void testToDoubleFromNumbers() {
assertThat(0.5, equalTo(InferenceHelpers.toDouble(0.5))); assertThat(InferenceHelpers.toDouble(0.5), equalTo(0.5));
assertThat(0.5, equalTo(InferenceHelpers.toDouble(0.5))); assertThat(InferenceHelpers.toDouble(5L), equalTo(5.0));
assertThat(5.0, equalTo(InferenceHelpers.toDouble(5L))); assertThat(InferenceHelpers.toDouble(5), equalTo(5.0));
assertThat(5.0, equalTo(InferenceHelpers.toDouble(5))); assertThat(InferenceHelpers.toDouble(0.5f), equalTo(0.5));
assertThat(0.5, equalTo(InferenceHelpers.toDouble(0.5f)));
} }
public void testToDoubleFromString() { public void testToDoubleFromString() {
assertThat(0.5, equalTo(InferenceHelpers.toDouble("0.5"))); assertThat(InferenceHelpers.toDouble(""), is(nullValue()));
assertThat(-0.5, equalTo(InferenceHelpers.toDouble("-0.5"))); assertThat(InferenceHelpers.toDouble("0.5"), equalTo(0.5));
assertThat(5.0, equalTo(InferenceHelpers.toDouble("5"))); assertThat(InferenceHelpers.toDouble("-0.5"), equalTo(-0.5));
assertThat(-5.0, equalTo(InferenceHelpers.toDouble("-5"))); assertThat(InferenceHelpers.toDouble("5"), equalTo(5.0));
assertThat(InferenceHelpers.toDouble("-5"), equalTo(-5.0));
// if ae are turned off, then we should get a null value // if ae are turned off, then we should get a null value
// otherwise, we should expect an assertion failure telling us that the string is improperly formatted // otherwise, we should expect an assertion failure telling us that the string is improperly formatted