Switch from using docvalue_fields to extracting values from _source (#44062) (#44804)

* Switch from using docvalue_fields to extracting values from _source
where applicable. Doing this means parsing the _source and handling the
numbers parsing just like Elasticsearch is doing it when it's indexing
a document.
* This also introduces a minor limitation: aliases type of fields that
are NOT part of a tree of sub-fields will not be able to be retrieved
anymore. field_caps API doesn't shed any light into a field being an
alias or not and at _source parsing time there is no way to know if a
root field is an alias or not. Fields of the type "a.b.c.alias" can be
extracted from docvalue_fields, only if the field they point to can be
extracted from docvalue_fields. Also, not all fields in a hierarchy of
fields can be evaluated to being an alias.

(cherry picked from commit 8bf8a055e38f00df5f49c8d97f632f69d6e00c2c)
This commit is contained in:
Andrei Stefan 2019-07-25 10:02:41 +03:00 committed by GitHub
parent f9943a3e53
commit 2633d11eb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 1118 additions and 71 deletions

View File

@ -24,9 +24,6 @@ Which returns:
{
"size" : 10,
"docvalue_fields" : [
{
"field": "page_count"
},
{
"field": "release_date",
"format": "epoch_millis"
@ -35,7 +32,8 @@ Which returns:
"_source": {
"includes": [
"author",
"name"
"name",
"page_count"
],
"excludes": []
},

View File

@ -161,3 +161,21 @@ By default,`geo_points` fields are indexed and have doc values. However only lat
indexed with some loss of precision from the original values (4.190951585769653E-8 for the latitude and
8.381903171539307E-8 for longitude). The altitude component is accepted but not stored in doc values nor indexed.
Therefore calling `ST_Z` function in the filtering, grouping or sorting will return `null`.
[float]
[[fields-from-source]]
=== Retrieving from `_source`
Most of {es-sql}'s columns are retrieved from the document's `_source` and there is no attempt to get the columns content from
`docvalue_fields` not even in the case <<mapping-source-field,`_source`>> field is disabled in the mapping explicitly.
If a column, for which there is no source stored, is asked for in a query, {es-sql} will not return it. Field types that don't follow
this restriction are: `keyword`, `date`, `scaled_float`, `geo_point`, `geo_shape` since they are NOT returned from `_source` but
from `docvalue_fields`.
[float]
[[fields-from-docvalues]]
=== Retrieving from `docvalue_fields`
When the number of columns retrieveable from `docvalue_fields` is greater than the configured <<dynamic-index-settings,`index.max_docvalue_fields_search` setting>>
the query will fail with `IllegalArgumentException: Trying to retrieve too many docvalue_fields` error. Either the mentioned {es}
setting needs to be adjusted or fewer columns retrieveable from `docvalue_fields` need to be selected.

View File

@ -150,7 +150,8 @@ public class NotEqualMessageBuilder {
field(field, "same [" + expected + "]");
return;
}
field(field, "expected [" + expected + "] but was [" + actual + "]");
field(field, "expected " + expected.getClass().getSimpleName() + " [" + expected + "] but was "
+ actual.getClass().getSimpleName() + " [" + actual + "]");
}
private void indent() {

View File

@ -122,7 +122,7 @@ public class ElasticsearchAssertionsTests extends ESTestCase {
AssertionError error = expectThrows(AssertionError.class,
() -> assertToXContentEquivalent(BytesReference.bytes(builder), BytesReference.bytes(otherBuilder),
builder.contentType()));
assertThat(error.getMessage(), containsString("f2: expected [value2] but was [differentValue2]"));
assertThat(error.getMessage(), containsString("f2: expected String [value2] but was String [differentValue2]"));
}
{
XContentBuilder builder = JsonXContent.contentBuilder();
@ -155,7 +155,7 @@ public class ElasticsearchAssertionsTests extends ESTestCase {
AssertionError error = expectThrows(AssertionError.class,
() -> assertToXContentEquivalent(BytesReference.bytes(builder), BytesReference.bytes(otherBuilder),
builder.contentType()));
assertThat(error.getMessage(), containsString("2: expected [three] but was [four]"));
assertThat(error.getMessage(), containsString("2: expected String [three] but was String [four]"));
}
{
XContentBuilder builder = JsonXContent.contentBuilder();

View File

@ -30,7 +30,7 @@ public class MlWithSecurityInsufficientRoleIT extends MlWithSecurityIT {
} catch (AssertionError ae) {
// Some tests assert on searches of wildcarded ML indices rather than on ML endpoints. For these we expect no hits.
if (ae.getMessage().contains("hits.total didn't match expected value")) {
assertThat(ae.getMessage(), containsString("but was [0]"));
assertThat(ae.getMessage(), containsString("but was Integer [0]"));
} else {
assertThat(ae.getMessage(),
either(containsString("action [cluster:monitor/xpack/ml")).or(containsString("action [cluster:admin/xpack/ml")));

View File

@ -96,15 +96,11 @@ public class CliExplainIT extends CliIntegrationTestCase {
assertThat(readLine(), startsWith(" },"));
assertThat(readLine(), startsWith(" \"_source\" : {"));
assertThat(readLine(), startsWith(" \"includes\" : ["));
assertThat(readLine(), startsWith(" \"i\""));
assertThat(readLine(), startsWith(" \"test_field\""));
assertThat(readLine(), startsWith(" ],"));
assertThat(readLine(), startsWith(" \"excludes\" : [ ]"));
assertThat(readLine(), startsWith(" },"));
assertThat(readLine(), startsWith(" \"docvalue_fields\" : ["));
assertThat(readLine(), startsWith(" {"));
assertThat(readLine(), startsWith(" \"field\" : \"i\""));
assertThat(readLine(), startsWith(" }"));
assertThat(readLine(), startsWith(" ],"));
assertThat(readLine(), startsWith(" \"sort\" : ["));
assertThat(readLine(), startsWith(" {"));
assertThat(readLine(), startsWith(" \"_doc\" :"));

View File

@ -0,0 +1,13 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.qa.single_node;
import org.elasticsearch.xpack.sql.qa.FieldExtractorTestCase;
public class FieldExtractorIT extends FieldExtractorTestCase {
}

View File

@ -0,0 +1,853 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.qa;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.xpack.sql.qa.rest.RestSqlTestCase;
import java.io.IOException;
import java.io.InputStream;
import java.sql.JDBCType;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import static java.util.Collections.singletonList;
import static org.elasticsearch.xpack.sql.qa.rest.RestSqlTestCase.assertResponse;
import static org.elasticsearch.xpack.sql.qa.rest.RestSqlTestCase.columnInfo;
import static org.elasticsearch.xpack.sql.qa.rest.RestSqlTestCase.expectBadRequest;
import static org.hamcrest.Matchers.containsString;
/**
* Test class covering parameters/settings that can be used in the mapping of an index
* and which can affect the outcome of _source extraction and parsing when retrieving
* values from Elasticsearch.
*/
public abstract class FieldExtractorTestCase extends ESRestTestCase {
/*
* "text_field": {
* "text": "keyword"
* }
*/
public void testTextField() throws IOException {
String query = "SELECT text_field FROM test";
String text = randomAlphaOfLength(20);
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
createIndexWithFieldTypeAndProperties("text", null, explicitSourceSetting ? indexProps : null);
index("{\"text_field\":\"" + text + "\"}");
if (explicitSourceSetting == false || enableSource == true) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", "text_field", "text", JDBCType.VARCHAR, Integer.MAX_VALUE)
));
expected.put("rows", singletonList(singletonList(text)));
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
}
}
/*
* "keyword_field": {
* "type": "keyword",
* "ignore_above": 10
* }
*/
public void testKeywordField() throws IOException {
String keyword = randomAlphaOfLength(20);
// _source for `keyword` fields doesn't matter, as they should be taken from docvalue_fields
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreAbove = randomBoolean();
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (ignoreAbove) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
fieldProp.put("ignore_above", 10);
fieldProps.put("keyword_field", fieldProp);
}
createIndexWithFieldTypeAndProperties("keyword", fieldProps, explicitSourceSetting ? indexProps : null);
index("{\"keyword_field\":\"" + keyword + "\"}");
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", "keyword_field", "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE)
));
expected.put("rows", singletonList(singletonList(ignoreAbove ? null : keyword)));
assertResponse(expected, runSql("SELECT keyword_field FROM test"));
}
/*
* "long/integer/short/byte_field": {
* "type": "long/integer/short/byte"
* }
*/
public void testFractionsForNonFloatingPointTypes() throws IOException {
String floatingPointNumber = "123.456";
String fieldType = randomFrom("long", "integer", "short", "byte");
createIndexWithFieldTypeAndProperties(fieldType, null, null);
index("{\"" + fieldType + "_field\":\"" + floatingPointNumber + "\"}");
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", fieldType + "_field", fieldType, jdbcTypeFor(fieldType), Integer.MAX_VALUE)
));
// because "coerce" is true, a "123.456" floating point number STRING should be converted to 123, no matter the numeric field type
expected.put("rows", singletonList(singletonList(123)));
assertResponse(expected, runSql("SELECT " + fieldType + "_field FROM test"));
}
/*
* "double/float/half_float/scaled_float_field": {
* "type": "double/float/half_float/scaled_float",
* "scaling_factor": 10 (for scaled_float type only)
* }
*/
public void testCoerceForFloatingPointTypes() throws IOException {
String floatingPointNumber = "123.456";
String fieldType = randomFrom("double", "float", "half_float", "scaled_float");
boolean isScaledFloat = fieldType == "scaled_float";
Map<String, Map<String, Object>> fieldProps = null;
if (isScaledFloat) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
fieldProp.put("scaling_factor", 10); // scaling_factor is required for "scaled_float"
fieldProps.put(fieldType + "_field", fieldProp);
}
createIndexWithFieldTypeAndProperties(fieldType, fieldProps, null);
// important here is to pass floatingPointNumber as a string: "float_field": "123.456"
index("{\"" + fieldType + "_field\":\"" + floatingPointNumber + "\"}");
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", fieldType + "_field", fieldType, jdbcTypeFor(fieldType), Integer.MAX_VALUE)
));
// because "coerce" is true, a "123.456" floating point number STRING should be converted to 123.456 as number
// and converted to 123.5 for "scaled_float" type
expected.put("rows", singletonList(singletonList(
isScaledFloat ? 123.5 : (fieldType != "double" ? Double.valueOf(123.456f) : Double.valueOf(floatingPointNumber)))));
assertResponse(expected, runSql("SELECT " + fieldType + "_field FROM test"));
}
/*
* "long_field": {
* "type": "long",
* "ignore_malformed": true/false
* }
*/
public void testLongFieldType() throws IOException {
testField("long", randomLong());
}
/*
* "integer_field": {
* "type": "integer",
* "ignore_malformed": true/false
* }
*/
public void testIntegerFieldType() throws IOException {
testField("integer", randomInt());
}
/*
* "short_field": {
* "type": "short",
* "ignore_malformed": true/false
* }
*/
public void testShortFieldType() throws IOException {
// Use Integer as the json parser that is used to read the values from the response will create
// Integers for short and byte values
testField("short", ((Number) randomShort()).intValue());
}
/*
* "byte_field": {
* "type": "byte",
* "ignore_malformed": true/false
* }
*/
public void testByteFieldType() throws IOException {
// Use Integer as the json parser that is used to read the values from the response will create
// Integers for short and byte values
testField("byte", ((Number) randomByte()).intValue());
}
private void testField(String fieldType, Object value) throws IOException {
String fieldName = fieldType + "_field";
String query = "SELECT " + fieldName + " FROM test";
Object actualValue = value;
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean(); // ignore_malformed is true, thus test a non-number value
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (ignoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a string instead of a number and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
fieldProps.put(fieldName, fieldProp);
actualValue = "\"foo\"";
}
createIndexWithFieldTypeAndProperties(fieldType, fieldProps, explicitSourceSetting ? indexProps : null);
index("{\"" + fieldName + "\":" + actualValue + "}");
if (explicitSourceSetting == false || enableSource == true) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", fieldName, fieldType, jdbcTypeFor(fieldType), Integer.MAX_VALUE)
));
expected.put("rows", singletonList(singletonList(ignoreMalformed ? null : actualValue)));
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
}
}
/*
* "boolean_field": {
* "type": "boolean"
* }
*/
public void testBooleanField() throws IOException {
String query = "SELECT boolean_field FROM test";
boolean booleanField = randomBoolean();
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean asString = randomBoolean(); // pass true or false as string "true" or "false
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
createIndexWithFieldTypeAndProperties("boolean", null, explicitSourceSetting ? indexProps : null);
if (asString) {
index("{\"boolean_field\":\"" + booleanField + "\"}");
} else {
index("{\"boolean_field\":" + booleanField + "}");
}
if (explicitSourceSetting == false || enableSource == true) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", "boolean_field", "boolean", JDBCType.BOOLEAN, Integer.MAX_VALUE)
));
// adding the boolean as a String here because parsing the response will yield a "true"/"false" String
expected.put("rows", singletonList(singletonList(asString ? String.valueOf(booleanField) : booleanField)));
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
}
}
/*
* "ip_field": {
* "type": "ip"
* }
*/
public void testIpField() throws IOException {
String query = "SELECT ip_field FROM test";
String ipField = "192.168.1.1";
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
createIndexWithFieldTypeAndProperties("ip", null, explicitSourceSetting ? indexProps : null);
index("{\"ip_field\":\"" + ipField + "\"}");
if (explicitSourceSetting == false || enableSource == true) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", "ip_field", "ip", JDBCType.VARCHAR, Integer.MAX_VALUE)
));
expected.put("rows", singletonList(singletonList(ipField)));
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
}
}
/*
* "keyword_field": {
* "type": "keyword"
* },
* "keyword_field_alias": {
* "type": "alias",
* "path": "keyword_field"
* },
* "a.b.c.keyword_field_alias": {
* "type": "alias",
* "path": "keyword_field"
* }
*/
public void testAliasFromDocValueField() throws IOException {
String keyword = randomAlphaOfLength(20);
createIndexWithFieldTypeAndAlias("keyword", null, null);
index("{\"keyword_field\":\"" + keyword + "\"}");
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", "keyword_field", "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE),
columnInfo("plain", "keyword_field_alias", "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE),
columnInfo("plain", "a.b.c.keyword_field_alias", "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE)
));
expected.put("rows", singletonList(Arrays.asList(keyword, keyword, keyword)));
assertResponse(expected, runSql("SELECT keyword_field, keyword_field_alias, a.b.c.keyword_field_alias FROM test"));
}
/*
* "text_field": {
* "type": "text"
* },
* "text_field_alias": {
* "type": "alias",
* "path": "text_field"
* },
* "a.b.c.text_field_alias": {
* "type": "alias",
* "path": "text_field"
* }
*/
public void testAliasFromSourceField() throws IOException {
String text = randomAlphaOfLength(20);
createIndexWithFieldTypeAndAlias("text", null, null);
index("{\"text_field\":\"" + text + "\"}");
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", "text_field", "text", JDBCType.VARCHAR, Integer.MAX_VALUE),
columnInfo("plain", "text_field_alias", "text", JDBCType.VARCHAR, Integer.MAX_VALUE),
columnInfo("plain", "a.b.c.text_field_alias", "text", JDBCType.VARCHAR, Integer.MAX_VALUE)
));
expected.put("rows", singletonList(Arrays.asList(text, null, null)));
assertResponse(expected, runSql("SELECT text_field, text_field_alias, a.b.c.text_field_alias FROM test"));
}
/*
* "integer_field": {
* "type": "integer"
* },
* "integer_field_alias": {
* "type": "alias",
* "path": "integer_field"
* },
* "a.b.c.integer_field_alias": {
* "type": "alias",
* "path": "integer_field"
* }
*/
public void testAliasAggregatableFromSourceField() throws IOException {
int number = randomInt();
createIndexWithFieldTypeAndAlias("integer", null, null);
index("{\"integer_field\":" + number + "}");
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", "integer_field", "integer", JDBCType.INTEGER, Integer.MAX_VALUE),
columnInfo("plain", "integer_field_alias", "integer", JDBCType.INTEGER, Integer.MAX_VALUE),
columnInfo("plain", "a.b.c.integer_field_alias", "integer", JDBCType.INTEGER, Integer.MAX_VALUE)
));
expected.put("rows", singletonList(Arrays.asList(number, null, number)));
assertResponse(expected, runSql("SELECT integer_field, integer_field_alias, a.b.c.integer_field_alias FROM test"));
}
/*
* "text_field": {
* "type": "text",
* "fields": {
* "keyword_subfield": {
* "type": "keyword",
* "ignore_above": 10
* }
* }
* }
*/
public void testTextFieldWithKeywordSubfield() throws IOException {
String text = randomAlphaOfLength(10) + " " + randomAlphaOfLength(10);
// _source for `keyword` fields doesn't matter, as they should be taken from docvalue_fields
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreAbove = randomBoolean();
String fieldName = "text_field";
String subFieldName = "text_field.keyword_subfield";
String query = "SELECT " + fieldName + "," + subFieldName + " FROM test";
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> subFieldsProps = null;
if (ignoreAbove) {
subFieldsProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
fieldProp.put("ignore_above", 10);
subFieldsProps.put(subFieldName, fieldProp);
}
createIndexWithFieldTypeAndSubFields("text", null, explicitSourceSetting ? indexProps : null, subFieldsProps, "keyword");
index("{\"" + fieldName + "\":\"" + text + "\"}");
if (explicitSourceSetting == false || enableSource == true) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", fieldName, "text", JDBCType.VARCHAR, Integer.MAX_VALUE),
columnInfo("plain", subFieldName, "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE)
));
expected.put("rows", singletonList(Arrays.asList(text, ignoreAbove ? null : text)));
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
// even if the _source is disabled, selecting only the keyword sub-field should work as expected
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", subFieldName, "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE)
));
expected.put("rows", singletonList(singletonList(ignoreAbove ? null : text)));
assertResponse(expected, runSql("SELECT text_field.keyword_subfield FROM test"));
}
}
/*
* "text_field": {
* "type": "text",
* "fields": {
* "integer_subfield": {
* "type": "integer",
* "ignore_malformed": true/false
* }
* }
* }
*/
public void testTextFieldWithIntegerNumberSubfield() throws IOException {
Integer number = randomInt();
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean(); // ignore_malformed is true, thus test a non-number value
Object actualValue = number;
String fieldName = "text_field";
String subFieldName = "text_field.integer_subfield";
String query = "SELECT " + fieldName + "," + subFieldName +" FROM test";
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> subFieldsProps = null;
if (ignoreMalformed) {
subFieldsProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a string instead of a number and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
subFieldsProps.put(subFieldName, fieldProp);
actualValue = "foo";
}
createIndexWithFieldTypeAndSubFields("text", null, explicitSourceSetting ? indexProps : null, subFieldsProps, "integer");
index("{\"" + fieldName + "\":\"" + actualValue + "\"}");
if (explicitSourceSetting == false || enableSource == true) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", fieldName, "text", JDBCType.VARCHAR, Integer.MAX_VALUE),
columnInfo("plain", subFieldName, "integer", JDBCType.INTEGER, Integer.MAX_VALUE)
));
if (ignoreMalformed) {
expected.put("rows", singletonList(Arrays.asList("foo", null)));
} else {
expected.put("rows", singletonList(Arrays.asList(String.valueOf(number), number)));
}
assertResponse(expected, runSql(query));
} else {
expectSourceDisabledError(query);
// if the _source is disabled, selecting only the integer sub-field shouldn't work as well
expectSourceDisabledError("SELECT " + subFieldName + " FROM test");
}
}
/*
* "integer_field": {
* "type": "integer",
* "ignore_malformed": true/false,
* "fields": {
* "keyword_subfield/text_subfield": {
* "type": "keyword/text"
* }
* }
* }
*/
public void testNumberFieldWithTextOrKeywordSubfield() throws IOException {
Integer number = randomInt();
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean ignoreMalformed = randomBoolean(); // ignore_malformed is true, thus test a non-number value
boolean isKeyword = randomBoolean(); // text or keyword subfield
Object actualValue = number;
String fieldName = "integer_field";
String subFieldName = "integer_field." + (isKeyword ? "keyword_subfield" : "text_subfield");
String query = "SELECT " + fieldName + "," + subFieldName +" FROM test";
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (ignoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
// on purpose use a string instead of a number and check for null when querying the field's value
fieldProp.put("ignore_malformed", true);
fieldProps.put(fieldName, fieldProp);
actualValue = "foo";
}
createIndexWithFieldTypeAndSubFields("integer", fieldProps, explicitSourceSetting ? indexProps : null, null,
isKeyword ? "keyword" : "text");
index("{\"" + fieldName + "\":\"" + actualValue + "\"}");
if (explicitSourceSetting == false || enableSource == true) {
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", fieldName, "integer", JDBCType.INTEGER, Integer.MAX_VALUE),
columnInfo("plain", subFieldName, isKeyword ? "keyword" : "text", JDBCType.VARCHAR, Integer.MAX_VALUE)
));
if (ignoreMalformed) {
expected.put("rows", singletonList(Arrays.asList(null, "foo")));
} else {
expected.put("rows", singletonList(Arrays.asList(number, String.valueOf(number))));
}
assertResponse(expected, runSql(query));
} else {
if (isKeyword) {
// selecting only the keyword subfield when the _source is disabled should work
Map<String, Object> expected = new HashMap<>();
expected.put("columns", singletonList(columnInfo("plain", subFieldName, "keyword", JDBCType.VARCHAR, Integer.MAX_VALUE)));
if (ignoreMalformed) {
expected.put("rows", singletonList(singletonList("foo")));
} else {
expected.put("rows", singletonList(singletonList(String.valueOf(number))));
}
assertResponse(expected, runSql("SELECT integer_field.keyword_subfield FROM test"));
} else {
expectSourceDisabledError(query);
}
// if the _source is disabled, selecting only the integer field shouldn't work
expectSourceDisabledError("SELECT " + fieldName + " FROM test");
}
}
/*
* "integer_field": {
* "type": "integer",
* "ignore_malformed": true/false,
* "fields": {
* "byte_subfield": {
* "type": "byte",
* "ignore_malformed": true/false
* }
* }
* }
*/
public void testIntegerFieldWithByteSubfield() throws IOException {
boolean isByte = randomBoolean();
Integer number = isByte == true ? randomByte() : randomIntBetween(Byte.MAX_VALUE + 1, Integer.MAX_VALUE);
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean rootIgnoreMalformed = randomBoolean(); // root field ignore_malformed
boolean subFieldIgnoreMalformed = randomBoolean(); // sub-field ignore_malformed
String fieldName = "integer_field";
String subFieldName = "integer_field.byte_subfield";
String query = "SELECT " + fieldName + "," + subFieldName + " FROM test";
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (rootIgnoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
fieldProp.put("ignore_malformed", true);
fieldProps.put(fieldName, fieldProp);
}
Map<String, Map<String, Object>> subFieldProps = null;
if (subFieldIgnoreMalformed) {
subFieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
fieldProp.put("ignore_malformed", true);
subFieldProps.put(subFieldName, fieldProp);
}
createIndexWithFieldTypeAndSubFields("integer", fieldProps, explicitSourceSetting ? indexProps : null, subFieldProps, "byte");
index("{\"" + fieldName + "\":" + number + "}");
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", fieldName, "integer", JDBCType.INTEGER, Integer.MAX_VALUE),
columnInfo("plain", subFieldName, "byte", JDBCType.TINYINT, Integer.MAX_VALUE)
));
if (explicitSourceSetting == false || enableSource == true) {
if (isByte == true || subFieldIgnoreMalformed == true) {
expected.put("rows", singletonList(Arrays.asList(number, isByte ? number : null)));
} else {
expected.put("rows", Collections.emptyList());
}
assertResponse(expected, runSql(query));
} else {
if (isByte == true || subFieldIgnoreMalformed == true) {
expectSourceDisabledError(query);
} else {
expected.put("rows", Collections.emptyList());
assertResponse(expected, runSql(query));
}
}
}
/*
* "byte_field": {
* "type": "byte",
* "ignore_malformed": true/false,
* "fields": {
* "integer_subfield": {
* "type": "integer",
* "ignore_malformed": true/false
* }
* }
* }
*/
public void testByteFieldWithIntegerSubfield() throws IOException {
boolean isByte = randomBoolean();
Integer number = isByte == true ? randomByte() : randomIntBetween(Byte.MAX_VALUE + 1, Integer.MAX_VALUE);
boolean explicitSourceSetting = randomBoolean(); // default (no _source setting) or explicit setting
boolean enableSource = randomBoolean(); // enable _source at index level
boolean rootIgnoreMalformed = randomBoolean(); // root field ignore_malformed
boolean subFieldIgnoreMalformed = randomBoolean(); // sub-field ignore_malformed
String fieldName = "byte_field";
String subFieldName = "byte_field.integer_subfield";
String query = "SELECT " + fieldName + "," + subFieldName + " FROM test";
Map<String, Object> indexProps = new HashMap<>(1);
indexProps.put("_source", enableSource);
Map<String, Map<String, Object>> fieldProps = null;
if (rootIgnoreMalformed) {
fieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
fieldProp.put("ignore_malformed", true);
fieldProps.put(fieldName, fieldProp);
}
Map<String, Map<String, Object>> subFieldProps = null;
if (subFieldIgnoreMalformed) {
subFieldProps = new HashMap<>(1);
Map<String, Object> fieldProp = new HashMap<>(1);
fieldProp.put("ignore_malformed", true);
subFieldProps.put(subFieldName, fieldProp);
}
createIndexWithFieldTypeAndSubFields("byte", fieldProps, explicitSourceSetting ? indexProps : null, subFieldProps, "integer");
index("{\"" + fieldName + "\":" + number + "}");
Map<String, Object> expected = new HashMap<>();
expected.put("columns", Arrays.asList(
columnInfo("plain", fieldName, "byte", JDBCType.TINYINT, Integer.MAX_VALUE),
columnInfo("plain", subFieldName, "integer", JDBCType.INTEGER, Integer.MAX_VALUE)
));
if (explicitSourceSetting == false || enableSource == true) {
if (isByte == true || rootIgnoreMalformed == true) {
expected.put("rows", singletonList(Arrays.asList(isByte ? number : null, number)));
} else {
expected.put("rows", Collections.emptyList());
}
assertResponse(expected, runSql(query));
} else {
if (isByte == true || rootIgnoreMalformed == true) {
expectSourceDisabledError(query);
} else {
expected.put("rows", Collections.emptyList());
assertResponse(expected, runSql(query));
}
}
}
private void expectSourceDisabledError(String query) {
expectBadRequest(() -> {
client().performRequest(buildRequest(query));
return Collections.emptyMap();
}, containsString("unable to fetch fields from _source field: _source is disabled in the mappings for index [test]"));
}
private void createIndexWithFieldTypeAndAlias(String type, Map<String, Map<String, Object>> fieldProps,
Map<String, Object> indexProps) throws IOException {
createIndexWithFieldTypeAndProperties(type, fieldProps, indexProps, true, false, null);
}
private void createIndexWithFieldTypeAndProperties(String type, Map<String, Map<String, Object>> fieldProps,
Map<String, Object> indexProps) throws IOException {
createIndexWithFieldTypeAndProperties(type, fieldProps, indexProps, false, false, null);
}
private void createIndexWithFieldTypeAndSubFields(String type, Map<String, Map<String, Object>> fieldProps,
Map<String, Object> indexProps, Map<String, Map<String, Object>> subFieldsProps,
String... subFieldsTypes) throws IOException {
createIndexWithFieldTypeAndProperties(type, fieldProps, indexProps, false, true, subFieldsProps, subFieldsTypes);
}
private void createIndexWithFieldTypeAndProperties(String type, Map<String, Map<String, Object>> fieldProps,
Map<String, Object> indexProps, boolean withAlias, boolean withSubFields, Map<String, Map<String, Object>> subFieldsProps,
String... subFieldsTypes) throws IOException {
Request request = new Request("PUT", "/test");
XContentBuilder index = JsonXContent.contentBuilder().prettyPrint().startObject();
index.startObject("mappings"); {
if (indexProps != null) {
for (Entry<String, Object> prop : indexProps.entrySet()) {
if (prop.getValue() instanceof Boolean) {
index.startObject(prop.getKey()); {
index.field("enabled", prop.getValue());
}
index.endObject();
}
}
}
index.startObject("properties"); {
String fieldName = type + "_field";
index.startObject(fieldName); {
index.field("type", type);
if (fieldProps != null && fieldProps.containsKey(fieldName)) {
for (Entry<String, Object> prop : fieldProps.get(fieldName).entrySet()) {
index.field(prop.getKey(), prop.getValue());
}
}
if (withSubFields) {
index.startObject("fields");
for (String subFieldType : subFieldsTypes) {
String subFieldName = subFieldType + "_subfield";
String fullSubFieldName = fieldName + "." + subFieldName;
index.startObject(subFieldName);
index.field("type", subFieldType);
if (subFieldsProps != null && subFieldsProps.containsKey(fullSubFieldName)) {
for (Entry<String, Object> prop : subFieldsProps.get(fullSubFieldName).entrySet()) {
index.field(prop.getKey(), prop.getValue());
}
}
index.endObject();
}
index.endObject();
}
}
index.endObject();
if (withAlias) {
// create two aliases - one within a hierarchy, the other just a simple field w/o hierarchy
index.startObject(fieldName + "_alias"); {
index.field("type", "alias");
index.field("path", fieldName);
}
index.endObject();
index.startObject("a.b.c." + fieldName + "_alias"); {
index.field("type", "alias");
index.field("path", fieldName);
}
index.endObject();
}
}
index.endObject();
}
index.endObject();
index.endObject();
request.setJsonEntity(Strings.toString(index));
client().performRequest(request);
}
private void index(String... docs) throws IOException {
Request request = new Request("POST", "/test/_bulk");
request.addParameter("refresh", "true");
StringBuilder bulk = new StringBuilder();
for (String doc : docs) {
bulk.append("{\"index\":{}\n");
bulk.append(doc + "\n");
}
request.setJsonEntity(bulk.toString());
client().performRequest(request);
}
private Request buildRequest(String query) {
Request request = new Request("POST", RestSqlTestCase.SQL_QUERY_REST_ENDPOINT);
request.addParameter("error_trace", "true");
request.addParameter("pretty", "true");
request.setEntity(new StringEntity("{\"query\":\"" + query + "\",\"mode\":\"plain\"}", ContentType.APPLICATION_JSON));
return request;
}
private Map<String, Object> runSql(String query) throws IOException {
Response response = client().performRequest(buildRequest(query));
try (InputStream content = response.getEntity().getContent()) {
return XContentHelper.convertToMap(JsonXContent.jsonXContent, content, false);
}
}
private JDBCType jdbcTypeFor(String esType) {
switch(esType) {
case "long":
return JDBCType.BIGINT;
case "integer":
return JDBCType.INTEGER;
case "short":
return JDBCType.SMALLINT;
case "byte":
return JDBCType.TINYINT;
case "float":
return JDBCType.REAL;
case "double":
return JDBCType.DOUBLE;
case "half_float":
return JDBCType.FLOAT;
case "scaled_float":
return JDBCType.DOUBLE;
default:
throw new AssertionError("Illegal value [" + esType + "] for data type");
}
}
}

View File

@ -128,7 +128,7 @@ public class ResultSetTestCase extends JdbcIntegrationTestCase {
Object text = results.getObject(2);
Object keyword = results.getObject(3);
assertEquals(-25, number);
assertEquals("xyz", text);
assertEquals("-25", text);
assertEquals("-25", keyword);
assertFalse(results.next());
});
@ -448,8 +448,8 @@ public class ResultSetTestCase extends JdbcIntegrationTestCase {
assertEquals("For field " + e.getKey(), Math.round(e.getValue().doubleValue()), results.getInt(e.getKey()));
assertEquals("For field " + e.getKey(), Math.round(e.getValue().doubleValue()), actual);
} else if (e.getValue() instanceof Float) {
assertEquals("For field " + e.getKey(), Math.round(e.getValue().floatValue()), results.getInt(e.getKey()));
assertEquals("For field " + e.getKey(), Math.round(e.getValue().floatValue()), actual);
assertEquals("For field " + e.getKey(), e.getValue(), Integer.valueOf(results.getInt(e.getKey())).floatValue());
assertEquals("For field " + e.getKey(), e.getValue(), Integer.valueOf(actual).floatValue());
} else {
assertEquals("For field " + e.getKey(), e.getValue().intValue(), results.getInt(e.getKey()));
assertEquals("For field " + e.getKey(), e.getValue().intValue(), actual);
@ -559,9 +559,9 @@ public class ResultSetTestCase extends JdbcIntegrationTestCase {
results.next();
for(Entry<String, Number> e : map.entrySet()) {
long actual = results.getObject(e.getKey(), Long.class);
if (e.getValue() instanceof Double || e.getValue() instanceof Float) {
assertEquals("For field " + e.getKey(), Math.round(e.getValue().doubleValue()), results.getLong(e.getKey()));
assertEquals("For field " + e.getKey(), Math.round(e.getValue().doubleValue()), actual);
if (e.getValue() instanceof Float) {
assertEquals("For field " + e.getKey(), e.getValue(), Long.valueOf(results.getLong(e.getKey())).floatValue());
assertEquals("For field " + e.getKey(), e.getValue(), Long.valueOf(actual).floatValue());
} else {
assertEquals("For field " + e.getKey(), e.getValue().longValue(), results.getLong(e.getKey()));
assertEquals("For field " + e.getKey(), e.getValue().longValue(), actual);
@ -660,10 +660,16 @@ public class ResultSetTestCase extends JdbcIntegrationTestCase {
doWithQuery(SELECT_WILDCARD, (results) -> {
results.next();
for(Entry<String, Number> e : map.entrySet()) {
if (e.getValue() instanceof Float) {
assertEquals("For field " + e.getKey(), e.getValue(), Double.valueOf(results.getDouble(e.getKey())).floatValue());
assertEquals("For field " + e.getKey(),
e.getValue(), Double.valueOf(results.getObject(e.getKey(), Double.class)).floatValue());
} else {
assertEquals("For field " + e.getKey(), e.getValue().doubleValue(), results.getDouble(e.getKey()), 0.0d);
assertEquals("For field " + e.getKey(),
e.getValue().doubleValue(), results.getObject(e.getKey(), Double.class), 0.0d);
}
}
});
}
@ -1475,8 +1481,8 @@ public class ResultSetTestCase extends JdbcIntegrationTestCase {
});
Integer[] values = randomArray(3, 15, s -> new Integer[s], () -> Integer.valueOf(randomInt(50)));
// add the minimal value in the middle yet the test will pick it up since the results are sorted
values[2] = Integer.valueOf(-10);
// add the known value as the first one in list. Parsing from _source the value will pick up the first value in the array.
values[0] = Integer.valueOf(-10);
String[] stringValues = new String[values.length];
for (int i = 0; i < values.length; i++) {
@ -1505,14 +1511,14 @@ public class ResultSetTestCase extends JdbcIntegrationTestCase {
});
Integer[] values = randomArray(3, 15, s -> new Integer[s], () -> Integer.valueOf(randomInt(50)));
// add the minimal value in the middle yet the test will pick it up since the results are sorted
values[2] = Integer.valueOf(-25);
// add the known value as the first one in list. Parsing from _source the value will pick up the first value in the array.
values[0] = Integer.valueOf(-25);
String[] stringValues = new String[values.length];
for (int i = 0; i < values.length; i++) {
stringValues[i] = String.valueOf(values[i]);
}
stringValues[0] = "xyz";
stringValues[1] = "xyz";
index("test", "1", builder -> {
builder.startArray("object");

View File

@ -351,7 +351,7 @@ public abstract class RestSqlTestCase extends ESRestTestCase implements ErrorsTe
}, containsString("unknown field [columnar], parser not found"));
}
protected void expectBadRequest(CheckedSupplier<Map<String, Object>, Exception> code, Matcher<String> errorMessageMatcher) {
public static void expectBadRequest(CheckedSupplier<Map<String, Object>, Exception> code, Matcher<String> errorMessageMatcher) {
try {
Map<String, Object> result = code.get();
fail("expected ResponseException but got " + result);
@ -861,7 +861,7 @@ public abstract class RestSqlTestCase extends ESRestTestCase implements ErrorsTe
);
}
private void assertResponse(Map<String, Object> expected, Map<String, Object> actual) {
public static void assertResponse(Map<String, Object> expected, Map<String, Object> actual) {
if (false == expected.equals(actual)) {
NotEqualMessageBuilder message = new NotEqualMessageBuilder();
message.compareMaps(actual, expected);

View File

@ -33,6 +33,7 @@ import org.elasticsearch.xpack.sql.type.KeywordEsField;
import org.elasticsearch.xpack.sql.type.TextEsField;
import org.elasticsearch.xpack.sql.type.UnsupportedEsField;
import org.elasticsearch.xpack.sql.util.CollectionUtils;
import org.elasticsearch.xpack.sql.util.Holder;
import java.util.ArrayList;
import java.util.Arrays;
@ -365,8 +366,8 @@ public class IndexResolver {
// lack of parent implies the field is an alias
if (map == null) {
// as such, create the field manually
fieldFunction = s -> createField(s, DataType.OBJECT.name(), new TreeMap<>(), false);
// as such, create the field manually, marking the field to also be an alias
fieldFunction = s -> createField(s, DataType.OBJECT.name(), new TreeMap<>(), false, true);
} else {
Iterator<FieldCapabilities> iterator = map.values().iterator();
FieldCapabilities parentCap = iterator.next();
@ -374,7 +375,7 @@ public class IndexResolver {
parentCap = iterator.next();
}
final FieldCapabilities parentC = parentCap;
fieldFunction = s -> createField(s, parentC.getType(), new TreeMap<>(), parentC.isAggregatable());
fieldFunction = s -> createField(s, parentC.getType(), new TreeMap<>(), parentC.isAggregatable(), false);
}
parent = createField(parentName, globalCaps, hierarchicalMapping, flattedMapping, fieldFunction);
@ -390,22 +391,23 @@ public class IndexResolver {
return esField;
}
private static EsField createField(String fieldName, String typeName, Map<String, EsField> props, boolean isAggregateable) {
private static EsField createField(String fieldName, String typeName, Map<String, EsField> props,
boolean isAggregateable, boolean isAlias) {
DataType esType = DataType.fromTypeName(typeName);
switch (esType) {
case TEXT:
return new TextEsField(fieldName, props, false);
return new TextEsField(fieldName, props, false, isAlias);
case KEYWORD:
int length = DataType.KEYWORD.defaultPrecision;
// TODO: to check whether isSearchable/isAggregateable takes into account the presence of the normalizer
boolean normalized = false;
return new KeywordEsField(fieldName, props, isAggregateable, length, normalized);
return new KeywordEsField(fieldName, props, isAggregateable, length, normalized, isAlias);
case DATETIME:
return new DateEsField(fieldName, props, isAggregateable);
case UNSUPPORTED:
return new UnsupportedEsField(fieldName, typeName);
default:
return new EsField(fieldName, esType, props, isAggregateable);
return new EsField(fieldName, esType, props, isAggregateable, isAlias);
}
}
@ -520,9 +522,26 @@ public class IndexResolver {
}
EsField field = indexFields.flattedMapping.get(fieldName);
if (field == null || (invalidField != null && (field instanceof InvalidMappedField) == false)) {
int dot = fieldName.lastIndexOf('.');
/*
* Looking up the "tree" at the parent fields here to see if the field is an alias.
* When the upper elements of the "tree" have no elements in fieldcaps, then this is an alias field. But not
* always: if there are two aliases - a.b.c.alias1 and a.b.c.alias2 - only one of them will be considered alias.
*/
Holder<Boolean> isAlias = new Holder<>(false);
if (dot >= 0) {
String parentName = fieldName.substring(0, dot);
if (indexFields.flattedMapping.get(parentName) == null) {
// lack of parent implies the field is an alias
if (fieldCaps.get(parentName) == null) {
isAlias.set(true);
}
}
}
createField(fieldName, fieldCaps, indexFields.hierarchicalMapping, indexFields.flattedMapping,
s -> invalidField != null ? invalidField : createField(s, typeCap.getType(), emptyMap(),
typeCap.isAggregatable()));
typeCap.isAggregatable(), isAlias.get()));
}
}
}

View File

@ -474,7 +474,7 @@ public class Querier {
private HitExtractor createExtractor(FieldExtraction ref) {
if (ref instanceof SearchHitFieldRef) {
SearchHitFieldRef f = (SearchHitFieldRef) ref;
return new FieldHitExtractor(f.name(), f.getDataType(), cfg.zoneId(),
return new FieldHitExtractor(f.name(), f.fullFieldName(), f.getDataType(), cfg.zoneId(),
f.useDocValue(), f.hitName(), multiValueFieldLeniency);
}

View File

@ -6,6 +6,7 @@
package org.elasticsearch.xpack.sql.execution.search.extractor;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.document.DocumentField;
@ -13,6 +14,7 @@ import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoUtils;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.index.mapper.IgnoredFieldMapper;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.xpack.sql.SqlIllegalArgumentException;
import org.elasticsearch.xpack.sql.expression.function.scalar.geo.GeoShape;
@ -34,6 +36,7 @@ import java.util.StringJoiner;
*/
public class FieldHitExtractor implements HitExtractor {
private static final Version SWITCHED_FROM_DOCVALUES_TO_SOURCE_EXTRACTION = Version.V_7_4_0;
/**
* Stands for {@code field}. We try to use short names for {@link HitExtractor}s
* to save a few bytes when when we send them back to the user.
@ -49,6 +52,7 @@ public class FieldHitExtractor implements HitExtractor {
}
private final String fieldName, hitName;
private final String fullFieldName; // used to look at the _ignored section of the query response for the actual full field name
private final DataType dataType;
private final ZoneId zoneId;
private final boolean useDocValue;
@ -56,15 +60,17 @@ public class FieldHitExtractor implements HitExtractor {
private final String[] path;
public FieldHitExtractor(String name, DataType dataType, ZoneId zoneId, boolean useDocValue) {
this(name, dataType, zoneId, useDocValue, null, false);
this(name, null, dataType, zoneId, useDocValue, null, false);
}
public FieldHitExtractor(String name, DataType dataType, ZoneId zoneId, boolean useDocValue, boolean arrayLeniency) {
this(name, dataType, zoneId, useDocValue, null, arrayLeniency);
this(name, null, dataType, zoneId, useDocValue, null, arrayLeniency);
}
public FieldHitExtractor(String name, DataType dataType, ZoneId zoneId, boolean useDocValue, String hitName, boolean arrayLeniency) {
public FieldHitExtractor(String name, String fullFieldName, DataType dataType, ZoneId zoneId, boolean useDocValue, String hitName,
boolean arrayLeniency) {
this.fieldName = name;
this.fullFieldName = fullFieldName;
this.dataType = dataType;
this.zoneId = zoneId;
this.useDocValue = useDocValue;
@ -82,6 +88,11 @@ public class FieldHitExtractor implements HitExtractor {
FieldHitExtractor(StreamInput in) throws IOException {
fieldName = in.readString();
if (in.getVersion().onOrAfter(SWITCHED_FROM_DOCVALUES_TO_SOURCE_EXTRACTION)) {
fullFieldName = in.readOptionalString();
} else {
fullFieldName = null;
}
String esType = in.readOptionalString();
dataType = esType != null ? DataType.fromTypeName(esType) : null;
zoneId = ZoneId.of(in.readString());
@ -99,6 +110,9 @@ public class FieldHitExtractor implements HitExtractor {
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(fieldName);
if (out.getVersion().onOrAfter(SWITCHED_FROM_DOCVALUES_TO_SOURCE_EXTRACTION)) {
out.writeOptionalString(fullFieldName);
}
out.writeOptionalString(dataType == null ? null : dataType.typeName);
out.writeString(zoneId.getId());
out.writeBoolean(useDocValue);
@ -115,6 +129,24 @@ public class FieldHitExtractor implements HitExtractor {
value = unwrapMultiValue(field.getValues());
}
} else {
// if the field was ignored because it was malformed and ignore_malformed was turned on
if (fullFieldName != null
&& hit.getFields().containsKey(IgnoredFieldMapper.NAME)
&& dataType.isFromDocValuesOnly() == false
&& dataType.isNumeric()) {
/*
* ignore_malformed makes sense for extraction from _source for numeric fields only.
* And we check here that the data type is actually a numeric one to rule out
* any non-numeric sub-fields (for which the "parent" field should actually be extracted from _source).
* For example, in the case of a malformed number, a "byte" field with "ignore_malformed: true"
* with a "text" sub-field should return "null" for the "byte" parent field and the actual malformed
* data for the "text" sub-field. Also, the _ignored section of the response contains the full field
* name, thus the need to do the comparison with that and not only the field name.
*/
if (hit.getFields().get(IgnoredFieldMapper.NAME).getValues().contains(fullFieldName)) {
return null;
}
}
Map<String, Object> source = hit.getSourceAsMap();
if (source != null) {
value = extractFromSource(source);
@ -165,13 +197,39 @@ public class FieldHitExtractor implements HitExtractor {
return DateUtils.asDateTime(Long.parseLong(values.toString()), zoneId);
}
}
// The Jackson json parser can generate for numerics - Integers, Longs, BigIntegers (if Long is not enough)
// and BigDecimal (if Double is not enough)
if (values instanceof Number
|| values instanceof String
|| values instanceof Boolean) {
if (values instanceof Number || values instanceof String || values instanceof Boolean) {
if (dataType == null) {
return values;
}
if (dataType.isNumeric() && dataType.isFromDocValuesOnly() == false) {
if (dataType == DataType.DOUBLE || dataType == DataType.FLOAT || dataType == DataType.HALF_FLOAT) {
Number result = null;
try {
result = dataType.numberType().parse(values, true);
} catch(IllegalArgumentException iae) {
return null;
}
// docvalue_fields is always returning a Double value even if the underlying floating point data type is not Double
// even if we don't extract from docvalue_fields anymore, the behavior should be consistent
return result.doubleValue();
} else {
Number result = null;
try {
result = dataType.numberType().parse(values, true);
} catch(IllegalArgumentException iae) {
return null;
}
return result;
}
} else if (dataType.isString()) {
return values.toString();
} else {
return values;
}
}
throw new SqlIllegalArgumentException("Type {} (returned by [{}]) is not supported", values.getClass().getSimpleName(), fieldName);
}
@ -260,6 +318,10 @@ public class FieldHitExtractor implements HitExtractor {
return fieldName;
}
public String fullFieldName() {
return fullFieldName;
}
public ZoneId zoneId() {
return zoneId;
}

View File

@ -31,6 +31,7 @@ import org.elasticsearch.xpack.sql.querydsl.query.MatchAll;
import org.elasticsearch.xpack.sql.querydsl.query.NestedQuery;
import org.elasticsearch.xpack.sql.querydsl.query.Query;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.type.DataType;
import java.io.IOException;
import java.util.AbstractMap;
@ -297,16 +298,48 @@ public class QueryContainer {
// reference methods
//
private FieldExtraction topHitFieldRef(FieldAttribute fieldAttr) {
return new SearchHitFieldRef(aliasName(fieldAttr), fieldAttr.field().getDataType(), fieldAttr.field().isAggregatable());
FieldAttribute actualField = fieldAttr;
FieldAttribute rootField = fieldAttr;
StringBuilder fullFieldName = new StringBuilder(fieldAttr.field().getName());
// Only if the field is not an alias (in which case it will be taken out from docvalue_fields if it's isAggregatable()),
// go up the tree of parents until a non-object (and non-nested) type of field is found and use that specific parent
// as the field to extract data from, from _source. We do it like this because sub-fields are not in the _source, only
// the root field to which those sub-fields belong to, are. Instead of "text_field.keyword_subfield" for _source extraction,
// we use "text_field", because there is no source for "keyword_subfield".
/*
* "text_field": {
* "type": "text",
* "fields": {
* "keyword_subfield": {
* "type": "keyword"
* }
* }
* }
*/
if (fieldAttr.field().isAlias() == false) {
while (actualField.parent() != null
&& actualField.parent().field().getDataType() != DataType.OBJECT
&& actualField.parent().field().getDataType() != DataType.NESTED
&& actualField.field().getDataType().isFromDocValuesOnly() == false) {
actualField = actualField.parent();
}
}
while (rootField.parent() != null) {
fullFieldName.insert(0, ".").insert(0, rootField.parent().field().getName());
rootField = rootField.parent();
}
return new SearchHitFieldRef(aliasName(actualField), fullFieldName.toString(), fieldAttr.field().getDataType(),
fieldAttr.field().isAggregatable(), fieldAttr.field().isAlias());
}
private Tuple<QueryContainer, FieldExtraction> nestedHitFieldRef(FieldAttribute attr) {
String name = aliasName(attr);
Query q = rewriteToContainNestedField(query, attr.source(),
attr.nestedParent().name(), name, attr.field().getDataType().format(), attr.field().isAggregatable());
attr.nestedParent().name(), name, attr.field().getDataType().format(), attr.field().getDataType().isFromDocValuesOnly());
SearchHitFieldRef nestedFieldRef = new SearchHitFieldRef(name, attr.field().getDataType(),
attr.field().isAggregatable(), attr.parent().name());
SearchHitFieldRef nestedFieldRef = new SearchHitFieldRef(name, null, attr.field().getDataType(), attr.field().isAggregatable(),
false, attr.parent().name());
return new Tuple<>(
new QueryContainer(q, aggs, fields, aliases, pseudoFunctions, scalarFunctions, sort, limit, trackHits, includeFrozen),

View File

@ -10,18 +10,23 @@ import org.elasticsearch.xpack.sql.type.DataType;
public class SearchHitFieldRef extends FieldReference {
private final String name;
private final String fullFieldName; // path included. If field full path is a.b.c, full field name is "a.b.c" and name is "c"
private final DataType dataType;
private final boolean docValue;
private final String hitName;
public SearchHitFieldRef(String name, DataType dataType, boolean useDocValueInsteadOfSource) {
this(name, dataType, useDocValueInsteadOfSource, null);
public SearchHitFieldRef(String name, String fullFieldName, DataType dataType, boolean useDocValueInsteadOfSource, boolean isAlias) {
this(name, fullFieldName, dataType, useDocValueInsteadOfSource, isAlias, null);
}
public SearchHitFieldRef(String name, DataType dataType, boolean useDocValueInsteadOfSource, String hitName) {
public SearchHitFieldRef(String name, String fullFieldName, DataType dataType, boolean useDocValueInsteadOfSource, boolean isAlias,
String hitName) {
this.name = name;
this.fullFieldName = fullFieldName;
this.dataType = dataType;
this.docValue = useDocValueInsteadOfSource;
// these field types can only be extracted from docvalue_fields (ie, values already computed by Elasticsearch)
// because, for us to be able to extract them from _source, we would need the mapping of those fields (which we don't have)
this.docValue = isAlias ? useDocValueInsteadOfSource : (dataType.isFromDocValuesOnly() ? useDocValueInsteadOfSource : false);
this.hitName = hitName;
}
@ -34,6 +39,10 @@ public class SearchHitFieldRef extends FieldReference {
return name;
}
public String fullFieldName() {
return fullFieldName;
}
public DataType getDataType() {
return dataType;
}

View File

@ -5,6 +5,7 @@
*/
package org.elasticsearch.xpack.sql.type;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.xpack.sql.util.DateUtils;
import java.sql.JDBCType;
@ -18,7 +19,7 @@ import java.util.Map.Entry;
/**
* Elasticsearch SQL data types.
* This class also implements JDBC {@link SQLType} for properly receiving and setting values.
* Where possible, please use the build-in, JDBC {@link Types} and {@link JDBCType} to avoid coupling
* Where possible, please use the built-in, JDBC {@link Types} and {@link JDBCType} to avoid coupling
* to the API.
*/
public enum DataType {
@ -143,7 +144,6 @@ public enum DataType {
SQL_TO_ES.put(entry.getKey().substring(4), entry.getValue());
}
// special ones
SQL_TO_ES.put("BOOL", DataType.BOOLEAN);
SQL_TO_ES.put("INT", DataType.INTEGER);
@ -181,7 +181,6 @@ public enum DataType {
*/
public final int defaultPrecision;
/**
* Display Size
* <p>
@ -270,6 +269,16 @@ public enum DataType {
return isDateBased() || isTimeBased();
}
// data type extract-able from _source or from docvalue_fields
public boolean isFromDocValuesOnly() {
return this == KEYWORD // because of ignore_above. Extracting this from _source wouldn't make sense if it wasn't indexed at all.
|| this == DATE // because of date formats
|| this == DATETIME
|| this == SCALED_FLOAT // because of scaling_factor
|| this == GEO_POINT
|| this == GEO_SHAPE;
}
public static DataType fromOdbcType(String odbcType) {
return ODBC_TO_ES.get(odbcType);
}
@ -296,4 +305,11 @@ public enum DataType {
public String format() {
return isDateOrTimeBased() ? DateUtils.DATE_PARSE_FORMAT : null;
}
/**
* Returns the appropriate NumberType enum corresponding to this es type
*/
public NumberType numberType() {
return NumberType.valueOf(esType.toUpperCase(Locale.ROOT));
}
}

View File

@ -19,12 +19,18 @@ public class EsField {
private final boolean aggregatable;
private final Map<String, EsField> properties;
private final String name;
private final boolean isAlias;
public EsField(String name, DataType esDataType, Map<String, EsField> properties, boolean aggregatable) {
this(name, esDataType, properties, aggregatable, false);
}
public EsField(String name, DataType esDataType, Map<String, EsField> properties, boolean aggregatable, boolean isAlias) {
this.name = name;
this.esDataType = esDataType;
this.aggregatable = aggregatable;
this.properties = properties;
this.isAlias = isAlias;
}
/**
@ -57,6 +63,13 @@ public class EsField {
return properties;
}
/**
* This field is an alias to another field
*/
public boolean isAlias() {
return isAlias;
}
/**
* Returns the path to the keyword version of this field if this field is text and it has a subfield that is
* indexed as keyword, throws an exception if such field is not found or the field name itself in all other cases.
@ -103,14 +116,14 @@ public class EsField {
return false;
}
EsField field = (EsField) o;
return aggregatable == field.aggregatable && esDataType == field.esDataType
return aggregatable == field.aggregatable && isAlias == field.isAlias && esDataType == field.esDataType
&& Objects.equals(name, field.name)
&& Objects.equals(properties, field.properties);
}
@Override
public int hashCode() {
return Objects.hash(esDataType, aggregatable, properties, name);
return Objects.hash(esDataType, aggregatable, properties, name, isAlias);
}
public static final class Exact {

View File

@ -22,7 +22,12 @@ public class KeywordEsField extends EsField {
}
public KeywordEsField(String name, Map<String, EsField> properties, boolean hasDocValues, int precision, boolean normalized) {
super(name, DataType.KEYWORD, properties, hasDocValues);
this(name, properties, hasDocValues, precision, normalized, false);
}
public KeywordEsField(String name, Map<String, EsField> properties, boolean hasDocValues, int precision,
boolean normalized, boolean isAlias) {
super(name, DataType.KEYWORD, properties, hasDocValues, isAlias);
this.precision = precision;
this.normalized = normalized;
}

View File

@ -17,7 +17,11 @@ import java.util.function.Function;
public class TextEsField extends EsField {
public TextEsField(String name, Map<String, EsField> properties, boolean hasDocValues) {
super(name, DataType.TEXT, properties, hasDocValues);
this(name, properties, hasDocValues, false);
}
public TextEsField(String name, Map<String, EsField> properties, boolean hasDocValues, boolean isAlias) {
super(name, DataType.TEXT, properties, hasDocValues, isAlias);
}
@Override

View File

@ -20,22 +20,22 @@ public class SqlTranslateActionIT extends AbstractSqlIntegTestCase {
public void testSqlTranslateAction() {
assertAcked(client().admin().indices().prepareCreate("test").get());
client().prepareBulk()
.add(new IndexRequest("test").id("1").source("data", "bar", "count", 42))
.add(new IndexRequest("test").id("2").source("data", "baz", "count", 43))
.add(new IndexRequest("test").id("1").source("data", "bar", "count", 42, "date", "1984-01-04"))
.add(new IndexRequest("test").id("2").source("data", "baz", "count", 43, "date", "1989-12-19"))
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
.get();
ensureYellow("test");
boolean columnOrder = randomBoolean();
String columns = columnOrder ? "data, count" : "count, data";
String columns = columnOrder ? "data, count, date" : "date, data, count";
SqlTranslateResponse response = new SqlTranslateRequestBuilder(client(), SqlTranslateAction.INSTANCE)
.query("SELECT " + columns + " FROM test ORDER BY count").get();
SearchSourceBuilder source = response.source();
FetchSourceContext fetch = source.fetchSource();
assertTrue(fetch.fetchSource());
assertArrayEquals(new String[] { "data" }, fetch.includes());
assertArrayEquals(new String[] { "data", "count" }, fetch.includes());
assertEquals(
singletonList(new DocValueFieldsContext.FieldAndFormat("count", null)),
singletonList(new DocValueFieldsContext.FieldAndFormat("date", "epoch_millis")),
source.docValueFields());
assertEquals(singletonList(SortBuilders.fieldSort("count").missing("_last").unmappedType("long")), source.sorts());
}

View File

@ -20,6 +20,7 @@ import org.elasticsearch.xpack.sql.expression.gen.processor.ChainingProcessor;
import org.elasticsearch.xpack.sql.expression.gen.processor.ChainingProcessorTests;
import org.elasticsearch.xpack.sql.expression.gen.processor.HitExtractorProcessor;
import org.elasticsearch.xpack.sql.expression.gen.processor.Processor;
import org.elasticsearch.xpack.sql.type.DataType;
import java.io.IOException;
import java.util.ArrayList;
@ -71,7 +72,7 @@ public class ComputingExtractorTests extends AbstractWireSerializingTestCase<Com
public void testGet() {
String fieldName = randomAlphaOfLength(5);
ChainingProcessor extractor = new ChainingProcessor(
new HitExtractorProcessor(new FieldHitExtractor(fieldName, null, UTC, true, false)),
new HitExtractorProcessor(new FieldHitExtractor(fieldName, DataType.DOUBLE, UTC, true, false)),
new MathProcessor(MathOperation.LOG));
int times = between(1, 1000);

View File

@ -42,7 +42,7 @@ public class FieldHitExtractorTests extends AbstractWireSerializingTestCase<Fiel
public static FieldHitExtractor randomFieldHitExtractor() {
String hitName = randomAlphaOfLength(5);
String name = randomAlphaOfLength(5) + "." + hitName;
return new FieldHitExtractor(name, null, randomZone(), randomBoolean(), hitName, false);
return new FieldHitExtractor(name, null, null, randomZone(), randomBoolean(), hitName, false);
}
@Override
@ -59,6 +59,7 @@ public class FieldHitExtractorTests extends AbstractWireSerializingTestCase<Fiel
protected FieldHitExtractor mutateInstance(FieldHitExtractor instance) {
return new FieldHitExtractor(
instance.fieldName() + "mutated",
instance.fullFieldName() + "mutated",
randomValueOtherThan(instance.dataType(), () -> randomFrom(DataType.values())),
randomValueOtherThan(instance.zoneId(), ESTestCase::randomZone),
randomBoolean(),
@ -188,7 +189,7 @@ public class FieldHitExtractorTests extends AbstractWireSerializingTestCase<Fiel
public void testToString() {
assertEquals("hit.field@hit@Europe/Berlin",
new FieldHitExtractor("hit.field", null, ZoneId.of("Europe/Berlin"), true, "hit", false).toString());
new FieldHitExtractor("hit.field", null, null, ZoneId.of("Europe/Berlin"), true, "hit", false).toString());
}
public void testMultiValuedDocValue() {
@ -291,7 +292,7 @@ public class FieldHitExtractorTests extends AbstractWireSerializingTestCase<Fiel
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testNestedFieldsWithDotsAndRandomHiearachy() {
public void testNestedFieldsWithDotsAndRandomHierarchy() {
String[] path = new String[100];
StringJoiner sj = new StringJoiner(".");
for (int i = 0; i < 100; i++) {

View File

@ -24,10 +24,9 @@
size: 1000
_source:
includes:
- int
- str
excludes: []
docvalue_fields:
- field: int
sort:
- int:
order: asc