[ML] fix array oob in IDGenerator and adjust format for mapping (#41703) (#41717)

* [ML] fix array oob in IDGenerator and adjust format for mapping

* Update DataFramePivotRestIT.java
This commit is contained in:
Benjamin Trent 2019-05-02 11:09:42 -05:00 committed by GitHub
parent b4bcbf9f64
commit a70f796edd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 44 additions and 12 deletions

View File

@ -214,9 +214,9 @@ public class DataFramePivotRestIT extends DataFrameRestTestCase {
config += " \"pivot\": {"
+ " \"group_by\": {"
+ " \"by_day\": {"
+ " \"by_hr\": {"
+ " \"date_histogram\": {"
+ " \"interval\": \"1d\",\"field\":\"timestamp\",\"format\":\"yyyy-MM-DD\""
+ " \"interval\": \"1h\",\"field\":\"timestamp\",\"format\":\"yyyy-MM-DD_HH\""
+ " } } },"
+ " \"aggregations\": {"
+ " \"avg_rating\": {"
@ -232,10 +232,9 @@ public class DataFramePivotRestIT extends DataFrameRestTestCase {
startAndWaitForTransform(transformId, dataFrameIndex, BASIC_AUTH_VALUE_DATA_FRAME_ADMIN_WITH_SOME_DATA_ACCESS);
assertTrue(indexExists(dataFrameIndex));
// we expect 21 documents as there shall be 21 days worth of docs
Map<String, Object> indexStats = getAsMap(dataFrameIndex + "/_stats");
assertEquals(21, XContentMapValues.extractValue("_all.total.docs.count", indexStats));
assertOnePivotValue(dataFrameIndex + "/_search?q=by_day:2017-01-15", 3.82);
assertEquals(104, XContentMapValues.extractValue("_all.total.docs.count", indexStats));
assertOnePivotValue(dataFrameIndex + "/_search?q=by_hr:1484499600000", 4.0833333333);
}
@SuppressWarnings("unchecked")

View File

@ -89,14 +89,20 @@ public abstract class DataFrameRestTestCase extends ESRestTestCase {
// create index
final StringBuilder bulk = new StringBuilder();
int day = 10;
int hour = 10;
int min = 10;
for (int i = 0; i < numDocs; i++) {
bulk.append("{\"index\":{\"_index\":\"" + REVIEWS_INDEX_NAME + "\"}}\n");
long user = Math.round(Math.pow(i * 31 % 1000, distributionTable[i % distributionTable.length]) % 27);
int stars = distributionTable[(i * 33) % distributionTable.length];
long business = Math.round(Math.pow(user * stars, distributionTable[i % distributionTable.length]) % 13);
int hour = randomIntBetween(10, 20);
int min = randomIntBetween(30, 59);
int sec = randomIntBetween(30, 59);
if (i % 12 == 0) {
hour = 10 + (i % 13);
}
if (i % 5 == 0) {
min = 10 + (i % 49);
}
int sec = 10 + (i % 49);
String date_string = "2017-01-" + day + "T" + hour + ":" + min + ":" + sec + "Z";
bulk.append("{\"user_id\":\"")

View File

@ -18,6 +18,8 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.xpack.core.dataframe.DataFrameField;
import org.elasticsearch.xpack.core.dataframe.DataFrameMessages;
import org.elasticsearch.xpack.core.dataframe.transforms.DataFrameTransformConfig;
import org.elasticsearch.xpack.core.dataframe.transforms.pivot.DateHistogramGroupSource;
import org.elasticsearch.xpack.core.dataframe.transforms.pivot.SingleGroupSource;
import java.io.IOException;
import java.util.Map;
@ -31,7 +33,9 @@ public final class DataframeIndex {
public static final String DOC_TYPE = "_doc";
private static final String PROPERTIES = "properties";
private static final String TYPE = "type";
private static final String FORMAT = "format";
private static final String META = "_meta";
private static final String DEFAULT_TIME_FORMAT = "strict_date_optional_time||epoch_millis";
private DataframeIndex() {
}
@ -44,7 +48,9 @@ public final class DataframeIndex {
request.settings(Settings.builder() // <1>
.put("index.number_of_shards", 1).put("index.number_of_replicas", 0));
request.mapping(DOC_TYPE, createMappingXContent(mappings, transformConfig.getId()));
request.mapping(DOC_TYPE, createMappingXContent(mappings,
transformConfig.getPivotConfig().getGroupConfig().getGroups(),
transformConfig.getId()));
client.execute(CreateIndexAction.INSTANCE, request, ActionListener.wrap(createIndexResponse -> {
listener.onResponse(true);
@ -56,14 +62,29 @@ public final class DataframeIndex {
}));
}
private static XContentBuilder createMappingXContent(Map<String, String> mappings, String id) {
private static XContentBuilder createMappingXContent(Map<String, String> mappings,
Map<String, SingleGroupSource> groupSources,
String id) {
try {
XContentBuilder builder = jsonBuilder().startObject();
builder.startObject(DOC_TYPE);
addMetaData(builder, id);
builder.startObject(PROPERTIES);
for (Entry<String, String> field : mappings.entrySet()) {
builder.startObject(field.getKey()).field(TYPE, field.getValue()).endObject();
String fieldName = field.getKey();
String fieldType = field.getValue();
builder.startObject(fieldName);
builder.field(TYPE, fieldType);
SingleGroupSource groupSource = groupSources.get(fieldName);
if (groupSource instanceof DateHistogramGroupSource) {
String format = ((DateHistogramGroupSource) groupSource).getFormat();
if (format != null) {
builder.field(FORMAT, DEFAULT_TIME_FORMAT + "||" + format);
}
}
builder.endObject();
}
builder.endObject(); // properties
builder.endObject(); // doc_type

View File

@ -21,6 +21,7 @@ import java.util.TreeMap;
*/
public final class IDGenerator {
private static final byte[] NULL_VALUE = "__NULL_VALUE__".getBytes(StandardCharsets.UTF_8);
private static final byte[] EMPTY_VALUE = "__EMPTY_VALUE__".getBytes(StandardCharsets.UTF_8);
private static final byte DELIM = '$';
private static final long SEED = 19;
private static final int MAX_FIRST_BYTES = 5;
@ -57,7 +58,9 @@ public final class IDGenerator {
for (Object value : objectsForIDGeneration.values()) {
byte[] v = getBytes(value);
if (v.length == 0) {
v = EMPTY_VALUE;
}
buffer.append(v, 0, v.length);
buffer.append(DELIM);

View File

@ -27,6 +27,9 @@ public class IDGeneratorTests extends ESTestCase {
assertNotEquals(id, idGen.getID());
idGen.add("key6", 13);
assertNotEquals(id, idGen.getID());
id = idGen.getID();
idGen.add("key7", "");
assertNotEquals(id, idGen.getID());
}
public void testOrderIndependence() {