QL: handle IP type fields extraction with ignore_malformed property (#66622) (#66686)

Return null for any field that is present in the _ignored section of the
response, not only numerics and IPs.

(cherry picked from commit 106719f6af1eb2f06396161143a5f0185c5d1b54)
This commit is contained in:
Andrei Stefan 2020-12-21 16:07:03 +02:00 committed by GitHub
parent d4039228ae
commit 12ffe4a4b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 279 additions and 12 deletions

View File

@ -129,16 +129,13 @@ public abstract class AbstractFieldHitExtractor implements HitExtractor {
// if the field was ignored because it was malformed and ignore_malformed was turned on
if (fullFieldName != null
&& hit.getFields().containsKey(IgnoredFieldMapper.NAME)
&& isFromDocValuesOnly(dataType) == false
&& dataType.isNumeric()) {
&& isFromDocValuesOnly(dataType) == false) {
/*
* ignore_malformed makes sense for extraction from _source for numeric fields only.
* And we check here that the data type is actually a numeric one to rule out
* any non-numeric sub-fields (for which the "parent" field should actually be extracted from _source).
* We check here the presence of the field name (fullFieldName including the parent name) in the list
* of _ignored fields (due to malformed data, which was ignored).
* For example, in the case of a malformed number, a "byte" field with "ignore_malformed: true"
* with a "text" sub-field should return "null" for the "byte" parent field and the actual malformed
* data for the "text" sub-field. Also, the _ignored section of the response contains the full field
* name, thus the need to do the comparison with that and not only the field name.
* data for the "text" sub-field.
*/
if (hit.getFields().get(IgnoredFieldMapper.NAME).getValues().contains(fullFieldName)) {
return null;
@ -202,7 +199,7 @@ public abstract class AbstractFieldHitExtractor implements HitExtractor {
}
return result;
}
} else if (DataTypes.isString(dataType)) {
} else if (DataTypes.isString(dataType) || dataType == DataTypes.IP) {
return values.toString();
} else {
return values;

View File

@ -345,25 +345,155 @@ public abstract class FieldExtractorTestCase extends BaseRestSqlTestCase {
/*
* "ip_field": {
* "type": "ip"
* "type": "ip",
* "ignore_malformed": true/false
* }
*/
public void testIpField() throws IOException {
String query = "SELECT ip_field FROM test";
String ipField = "192.168.1.1";
String actualValue = ipField;
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean();
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
createIndexWithFieldTypeAndProperties("ip", null, explicitSourceSetting ? indexProps : null);
index("{\"ip_field\":\"" + ipField + "\"}");
Map<String, Map<String, Object>> fieldProps = null;
if (ignoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a non-IP and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
fieldProps.put("ip_field", fieldProp);
actualValue = "foo";
}
createIndexWithFieldTypeAndProperties("ip", fieldProps, explicitSourceSetting ? indexProps : null);
index("{\"ip_field\":\"" + actualValue + "\"}");
if (explicitSourceSetting == false || enableSource) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(columnInfo("plain", "ip_field", "ip", JDBCType.VARCHAR, Integer.MAX_VALUE)));
expected.put("rows", singletonList(singletonList(ipField)));
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : actualValue)));
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
}
}
/*
* "geo_point_field": {
* "type": "geo_point",
* "ignore_malformed": true/false
* }
*/
public void testGeoPointField() throws IOException {
String query = "SELECT geo_point_field FROM test";
String geoPointField = "41.12,-71.34";
String geoPointFromDocValues = "POINT (-71.34000004269183 41.1199999647215)";
String actualValue = geoPointField;
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean();
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (ignoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a non-geo-point and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
fieldProps.put("geo_point_field", fieldProp);
actualValue = "foo";
}
createIndexWithFieldTypeAndProperties("geo_point", fieldProps, explicitSourceSetting ? indexProps : null);
index("{\"geo_point_field\":\"" + actualValue + "\"}");
// the values come from docvalues (vs from _source) so disabling the source doesn't have any impact on the values returned
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(columnInfo("plain", "geo_point_field", "geo_point", JDBCType.VARCHAR, Integer.MAX_VALUE)));
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : geoPointFromDocValues)));
assertResponse(expected, runSql(query));
}
/*
* "geo_shape_field": {
* "type": "point",
* "ignore_malformed": true/false
* }
*/
public void testGeoShapeField() throws IOException {
String query = "SELECT geo_shape_field FROM test";
String actualValue = "[-77.03653, 38.897676]";
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean();
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (ignoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a non-geo-shape and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
fieldProps.put("geo_shape_field", fieldProp);
actualValue = "\"foo\"";
}
createIndexWithFieldTypeAndProperties("geo_shape", fieldProps, explicitSourceSetting ? indexProps : null);
index("{\"geo_shape_field\":{\"type\":\"point\",\"coordinates\":" + actualValue + "}}");
if (explicitSourceSetting == false || enableSource) {
Map<String, Object> expected = new HashMap<>();
expected.put(
"columns",
Arrays.asList(columnInfo("plain", "geo_shape_field", "geo_shape", JDBCType.VARCHAR, Integer.MAX_VALUE))
);
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : "POINT (-77.03653 38.897676)")));
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
}
}
/*
* "shape_field": {
* "type": "shape",
* "ignore_malformed": true/false
* }
*/
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/66678")
public void testShapeField() throws IOException {
String query = "SELECT shape_field FROM test";
String shapeField = "POINT (-377.03653 389.897676)";
String actualValue = shapeField;
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean();
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (ignoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a non-geo-point and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
fieldProps.put("shape_field", fieldProp);
actualValue = "foo";
}
createIndexWithFieldTypeAndProperties("shape", fieldProps, explicitSourceSetting ? indexProps : null);
index("{\"shape_field\":\"" + actualValue + "\"}");
if (explicitSourceSetting == false || enableSource) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(columnInfo("plain", "shape_field", "shape", JDBCType.VARCHAR, Integer.MAX_VALUE)));
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : "POINT (-377.03653 389.897676)")));
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
@ -584,6 +714,65 @@ public abstract class FieldExtractorTestCase extends BaseRestSqlTestCase {
}
}
/*
* "text_field": {
* "type": "text",
* "fields": {
* "ip_subfield": {
* "type": "ip",
* "ignore_malformed": true/false
* }
* }
* }
*/
public void testTextFieldWithIpSubfield() throws IOException {
String ip = "123.123.123.123";
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean(); // ignore_malformed is true, thus test a non-IP value
String actualValue = ip;
String fieldName = "text_field";
String subFieldName = "text_field.ip_subfield";
String query = "SELECT " + fieldName + "," + subFieldName + " FROM test";
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> subFieldsProps = null;
if (ignoreMalformed) {
subFieldsProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a non-IP value instead of an IP and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
subFieldsProps.put(subFieldName, fieldProp);
actualValue = "foo";
}
createIndexWithFieldTypeAndSubFields("text", null, explicitSourceSetting ? indexProps : null, subFieldsProps, "ip");
index("{\"" + fieldName + "\":\"" + actualValue + "\"}");
if (explicitSourceSetting == false || enableSource) {
Map<String, Object> expected = new HashMap<>();
expected.put(
"columns",
Arrays.asList(
columnInfo("plain", fieldName, "text", JDBCType.VARCHAR, Integer.MAX_VALUE),
columnInfo("plain", subFieldName, "ip", JDBCType.VARCHAR, Integer.MAX_VALUE)
)
);
if (ignoreMalformed) {
expected.put("rows", singletonList(Arrays.asList("foo", null)));
} else {
expected.put("rows", singletonList(Arrays.asList(ip, ip)));
}
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
// if the _source is disabled, selecting only the ip sub-field shouldn't work as well
expectSourceDisabledError("SELECT " + subFieldName + " FROM test");
}
}
/*
* "integer_field": {
* "type": "integer",
@ -663,6 +852,85 @@ public abstract class FieldExtractorTestCase extends BaseRestSqlTestCase {
}
}
/*
* "ip_field": {
* "type": "ip",
* "ignore_malformed": true/false,
* "fields": {
* "keyword_subfield/text_subfield": {
* "type": "keyword/text"
* }
* }
* }
*/
public void testIpFieldWithTextOrKeywordSubfield() throws IOException {
String ip = "123.123.123.123";
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean(); // ignore_malformed is true, thus test a non-number value
boolean isKeyword = randomBoolean(); // text or keyword subfield
String actualValue = ip;
String fieldName = "ip_field";
String subFieldName = "ip_field." + (isKeyword ? "keyword_subfield" : "text_subfield");
String query = "SELECT " + fieldName + "," + subFieldName + " FROM test";
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (ignoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a non-IP instead of an ip and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
fieldProps.put(fieldName, fieldProp);
actualValue = "foo";
}
createIndexWithFieldTypeAndSubFields(
"ip",
fieldProps,
explicitSourceSetting ? indexProps : null,
null,
isKeyword ? "keyword" : "text"
);
index("{\"" + fieldName + "\":\"" + actualValue + "\"}");
if (explicitSourceSetting == false || enableSource) {
Map<String, Object> expected = new HashMap<>();
expected.put(
"columns",
Arrays.asList(
columnInfo("plain", fieldName, "ip", JDBCType.VARCHAR, Integer.MAX_VALUE),
columnInfo("plain", subFieldName, isKeyword ? "keyword" : "text", JDBCType.VARCHAR, Integer.MAX_VALUE)
)
);
if (ignoreMalformed) {
expected.put("rows", singletonList(Arrays.asList(null, "foo")));
} else {
expected.put("rows", singletonList(Arrays.asList(ip, ip)));
}
assertResponse(expected, runSql(query));
} else {
if (isKeyword) {
// selecting only the keyword subfield when the _source is disabled should work
Map<String, Object> expected = new HashMap<>();
expected.put("columns", singletonList(columnInfo("plain", subFieldName, "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE)));
if (ignoreMalformed) {
expected.put("rows", singletonList(singletonList("foo")));
} else {
expected.put("rows", singletonList(singletonList(ip)));
}
assertResponse(expected, runSql("SELECT ip_field.keyword_subfield FROM test"));
} else {
expectSourceDisabledError(query);
}
// if the _source is disabled, selecting only the ip field shouldn't work
expectSourceDisabledError("SELECT " + fieldName + " FROM test");
}
}
/*
* "integer_field": {
* "type": "integer",
@ -946,6 +1214,8 @@ public abstract class FieldExtractorTestCase extends BaseRestSqlTestCase {
return JDBCType.FLOAT;
case "scaled_float":
return JDBCType.DOUBLE;
case "ip":
return JDBCType.VARCHAR;
default:
throw new AssertionError("Illegal value [" + esType + "] for data type");
}