Integrate domainSplit function into datafeeds (elastic/elasticsearch#841)
If `domainSplit(` is detected in an inline script, the function and params are injected into the script. The majority of this PR is actually test-related. Adds a unit test to check for the injected script/params. Also adds another QA test which -- through a very round-about mechanism -- confirms that the injected script compiles and functions correctly. The QA test can be simplified greatly once the Preview API is added. Original commit: elastic/x-pack-elasticsearch@c7c35a982c
This commit is contained in:
parent
ec902c4dc3
commit
a11ddd1e04
|
@ -18,10 +18,12 @@ import org.elasticsearch.common.xcontent.XContentParser;
|
|||
import org.elasticsearch.index.query.QueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.xpack.ml.job.config.Job;
|
||||
import org.elasticsearch.xpack.ml.job.messages.Messages;
|
||||
import org.elasticsearch.xpack.ml.utils.DomainSplitFunction;
|
||||
import org.elasticsearch.xpack.ml.utils.ExceptionsHelper;
|
||||
import org.elasticsearch.xpack.ml.utils.MlStrings;
|
||||
|
||||
|
@ -29,7 +31,9 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
|
@ -401,7 +405,27 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
|||
}
|
||||
|
||||
public void setScriptFields(List<SearchSourceBuilder.ScriptField> scriptFields) {
|
||||
List<SearchSourceBuilder.ScriptField> sorted = new ArrayList<>(scriptFields);
|
||||
List<SearchSourceBuilder.ScriptField> sorted = new ArrayList<>();
|
||||
for (SearchSourceBuilder.ScriptField scriptField : scriptFields) {
|
||||
String script = scriptField.script().getIdOrCode();
|
||||
|
||||
if (script.contains("domainSplit(")) {
|
||||
String modifiedCode = DomainSplitFunction.function + "\n" + script;
|
||||
Map<String, Object> modifiedParams = new HashMap<>(scriptField.script().getParams().size()
|
||||
+ DomainSplitFunction.params.size());
|
||||
|
||||
modifiedParams.putAll(scriptField.script().getParams());
|
||||
modifiedParams.putAll(DomainSplitFunction.params);
|
||||
|
||||
Script newScript = new Script(scriptField.script().getType(), scriptField.script().getLang(),
|
||||
modifiedCode, modifiedParams);
|
||||
|
||||
sorted.add(new SearchSourceBuilder.ScriptField(scriptField.fieldName(), newScript, scriptField.ignoreFailure()));
|
||||
} else {
|
||||
sorted.add(scriptField);
|
||||
}
|
||||
|
||||
}
|
||||
sorted.sort(Comparator.comparing(SearchSourceBuilder.ScriptField::fieldName));
|
||||
this.scriptFields = sorted;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.utils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.ResourceBundle;
|
||||
|
||||
public final class DomainSplitFunction {
|
||||
|
||||
public static final String function;
|
||||
public static final Map<String, Object> params;
|
||||
|
||||
DomainSplitFunction() {}
|
||||
|
||||
static {
|
||||
Map<String, Object> paramsMap = new HashMap<>();
|
||||
|
||||
ResourceBundle resource = ResourceBundle.getBundle("org/elasticsearch/xpack/ml/transforms/exact", Locale.getDefault());
|
||||
Enumeration<String> keys = resource.getKeys();
|
||||
Map<String, String> exact = new HashMap<>(2048);
|
||||
while (keys.hasMoreElements()) {
|
||||
String key = keys.nextElement();
|
||||
String value = resource.getString(key);
|
||||
exact.put(key, value);
|
||||
}
|
||||
exact = Collections.unmodifiableMap(exact);
|
||||
|
||||
Map<String, Object> under = new HashMap<>(30);
|
||||
under.put("bd", "i");
|
||||
under.put("np", "i");
|
||||
under.put("jm", "i");
|
||||
under.put("fj", "i");
|
||||
under.put("fk", "i");
|
||||
under.put("ye", "i");
|
||||
under.put("sch.uk", "i");
|
||||
under.put("bn", "i");
|
||||
under.put("kitakyushu.jp", "i");
|
||||
under.put("kobe.jp", "i");
|
||||
under.put("ke", "i");
|
||||
under.put("sapporo.jp", "i");
|
||||
under.put("kh", "i");
|
||||
under.put("mm", "i");
|
||||
under.put("il", "i");
|
||||
under.put("yokohama.jp", "i");
|
||||
under.put("ck", "i");
|
||||
under.put("nagoya.jp", "i");
|
||||
under.put("sendai.jp", "i");
|
||||
under.put("kw", "i");
|
||||
under.put("er", "i");
|
||||
under.put("mz", "i");
|
||||
under.put("platform.sh", "p");
|
||||
under.put("gu", "i");
|
||||
under.put("nom.br", "i");
|
||||
under.put("zm", "i");
|
||||
under.put("pg", "i");
|
||||
under.put("ni", "i");
|
||||
under.put("kawasaki.jp", "i");
|
||||
under.put("zw", "i");
|
||||
under = Collections.unmodifiableMap(under);
|
||||
|
||||
Map<String, String> excluded = new HashMap<>(9);
|
||||
excluded.put("city.yokohama.jp", "i");
|
||||
excluded.put("teledata.mz", "i");
|
||||
excluded.put("city.kobe.jp", "i");
|
||||
excluded.put("city.sapporo.jp", "i");
|
||||
excluded.put("city.kawasaki.jp", "i");
|
||||
excluded.put("city.nagoya.jp", "i");
|
||||
excluded.put("www.ck", "i");
|
||||
excluded.put("city.sendai.jp", "i");
|
||||
excluded.put("city.kitakyushu.jp", "i");
|
||||
excluded = Collections.unmodifiableMap(excluded);
|
||||
|
||||
|
||||
paramsMap.put("excluded", excluded);
|
||||
paramsMap.put("under", under);
|
||||
paramsMap.put("exact", exact);
|
||||
params = Collections.unmodifiableMap(paramsMap);
|
||||
}
|
||||
|
||||
static {
|
||||
String fn = "String replaceDots(String input) {\n" +
|
||||
" String output = input;\n" +
|
||||
" if (output.indexOf('。') >= 0) {\n" +
|
||||
" output = output.replace('。', '.');\n" +
|
||||
" }\n" +
|
||||
" if (output.indexOf('.') >= 0) {\n" +
|
||||
" output = output.replace('.', '.');\n" +
|
||||
" }\n" +
|
||||
" if (output.indexOf('。') >= 0) {\n" +
|
||||
" output = output.replace('。', '.');\n" +
|
||||
" }\n" +
|
||||
" return output;\n" +
|
||||
"}\n" +
|
||||
"List split(String value) {\n" +
|
||||
" int nextWord = 0;\n" +
|
||||
" List splits = [];\n" +
|
||||
" for(int i = 0; i < value.length(); i++) {\n" +
|
||||
" if(value.charAt(i) == (char)'.') {\n" +
|
||||
" splits.add(value.substring(nextWord, i));\n" +
|
||||
" nextWord = i+1;\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
" if (nextWord != value.length()) {\n" +
|
||||
" splits.add(value.substring(nextWord, value.length()));\n" +
|
||||
" }\n" +
|
||||
" return splits;\n" +
|
||||
"}\n" +
|
||||
"List splitDomain(String domain) {\n" +
|
||||
" String dotDomain = replaceDots(domain);\n" +
|
||||
" return split(dotDomain);\n" +
|
||||
"}\n" +
|
||||
"boolean validateSyntax(List parts) {\n" +
|
||||
" int lastIndex = parts.length - 1;\n" +
|
||||
" /* Validate the last part specially, as it has different syntax rules. */\n" +
|
||||
" if (!validatePart(parts[lastIndex], true)) {\n" +
|
||||
" return false;\n" +
|
||||
" }\n" +
|
||||
" for (int i = 0; i < lastIndex; i++) {\n" +
|
||||
" String part = parts[i];\n" +
|
||||
" if (!validatePart(part, false)) {\n" +
|
||||
" return false;\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
" return true;\n" +
|
||||
"}\n" +
|
||||
"boolean validatePart(String part, boolean isFinalPart) {\n" +
|
||||
" int MAX_DOMAIN_PART_LENGTH = 63;\n" +
|
||||
" if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) {\n" +
|
||||
" return false;\n" +
|
||||
" }\n" +
|
||||
" int offset = 0;\n" +
|
||||
" int strLen = part.length();\n" +
|
||||
" while (offset < strLen) {\n" +
|
||||
" int curChar = part.charAt(offset);\n" +
|
||||
" offset += 1;\n" +
|
||||
" if (!(Character.isLetterOrDigit(curChar) || curChar == (char)'-' || curChar == (char)'_')) {\n" +
|
||||
" return false;\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
" if (part.charAt(0) == (char)'-' || part.charAt(0) == (char)'_' ||\n" +
|
||||
" part.charAt(part.length() - 1) == (char)'-' || part.charAt(part.length() - 1) == (char)'_') {\n" +
|
||||
" return false;\n" +
|
||||
" }\n" +
|
||||
" if (isFinalPart && Character.isDigit(part.charAt(0))) {\n" +
|
||||
" return false;\n" +
|
||||
" }\n" +
|
||||
" return true;\n" +
|
||||
"}\n" +
|
||||
"int findPublicSuffix(Map params, List parts) {\n" +
|
||||
" int partsSize = parts.size();\n" +
|
||||
"\n" +
|
||||
" for (int i = 0; i < partsSize; i++) {\n" +
|
||||
" StringJoiner joiner = new StringJoiner('.');\n" +
|
||||
" for (String s : parts.subList(i, partsSize)) {\n" +
|
||||
" joiner.add(s);\n" +
|
||||
" }\n" +
|
||||
" /* parts.subList(i, partsSize).each(joiner::add); */\n" +
|
||||
" String ancestorName = joiner.toString();\n" +
|
||||
"\n" +
|
||||
" if (params['exact'].containsKey(ancestorName)) {\n" +
|
||||
" return i;\n" +
|
||||
" }\n" +
|
||||
"\n" +
|
||||
" /* Excluded domains (e.g. !nhs.uk) use the next highest\n" +
|
||||
" domain as the effective public suffix (e.g. uk). */\n" +
|
||||
"\n" +
|
||||
" if (params['excluded'].containsKey(ancestorName)) {\n" +
|
||||
" return i + 1;\n" +
|
||||
" }\n" +
|
||||
"\n" +
|
||||
" List pieces = split(ancestorName);\n" +
|
||||
" if (pieces.length >= 2 && params['under'].containsKey(pieces[1])) {\n" +
|
||||
" return i;\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
"\n" +
|
||||
" return -1;\n" +
|
||||
"}\n" +
|
||||
"String ancestor(List parts, int levels) {\n" +
|
||||
" StringJoiner joiner = new StringJoiner('.');\n" +
|
||||
" for (String s : parts.subList(levels, parts.size())) {\n" +
|
||||
" joiner.add(s);\n" +
|
||||
" }\n" +
|
||||
" String name = joiner.toString();\n" +
|
||||
" if (name.endsWith('.')) {\n" +
|
||||
" name = name.substring(0, name.length() - 1);\n" +
|
||||
" }\n" +
|
||||
" return name;\n" +
|
||||
"}\n" +
|
||||
"String topPrivateDomain(String name, List parts, int publicSuffixIndex) {\n" +
|
||||
" if (publicSuffixIndex == 1) {\n" +
|
||||
" return name;\n" +
|
||||
" }\n" +
|
||||
" if (!(publicSuffixIndex > 0)) {\n" +
|
||||
" throw new IllegalArgumentException('Not under a public suffix: ' + name);\n" +
|
||||
" }\n" +
|
||||
" return ancestor(parts, publicSuffixIndex - 1);\n" +
|
||||
"}\n" +
|
||||
"List domainSplit(String host, Map params) {\n" +
|
||||
" int MAX_DNS_NAME_LENGTH = 253;\n" +
|
||||
" int MAX_LENGTH = 253;\n" +
|
||||
" int MAX_PARTS = 127;\n" +
|
||||
" if ('host'.isEmpty()) {\n" +
|
||||
" return ['',''];\n" +
|
||||
" }\n" +
|
||||
" host = host.trim();\n" +
|
||||
" if (host.contains(':')) {\n" +
|
||||
" return ['', host];\n" +
|
||||
" }\n" +
|
||||
" boolean tentativeIP = true;\n" +
|
||||
" for(int i = 0; i < host.length(); i++) {\n" +
|
||||
" if (!(Character.isDigit(host.charAt(i)) || host.charAt(i) == (char)'.')) {\n" +
|
||||
" tentativeIP = false;\n" +
|
||||
" break;\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
" if (tentativeIP) {\n" +
|
||||
" /* special-snowflake rules now... */\n" +
|
||||
" if (host == '.') {\n" +
|
||||
" return ['',''];\n" +
|
||||
" }\n" +
|
||||
" return ['', host];\n" +
|
||||
" }\n" +
|
||||
" def normalizedHost = host;\n" +
|
||||
" normalizedHost = normalizedHost.toLowerCase();\n" +
|
||||
" List parts = splitDomain(normalizedHost);\n" +
|
||||
" int publicSuffixIndex = findPublicSuffix(params, parts);\n" +
|
||||
" if (publicSuffixIndex == 0) {\n" +
|
||||
" return ['', host];\n" +
|
||||
" }\n" +
|
||||
" String highestRegistered = '';\n" +
|
||||
" /* for the case where the host is internal like .local so is not a recognised public suffix */\n" +
|
||||
" if (publicSuffixIndex == -1) {\n" +
|
||||
" if (!parts.isEmpty()) {\n" +
|
||||
" if (parts.size() == 1) {\n" +
|
||||
" return ['', host];\n" +
|
||||
" }\n" +
|
||||
" if (parts.size() > 2) {\n" +
|
||||
" boolean allNumeric = true;\n" +
|
||||
" String value = parts.get(parts.size() - 1);\n" +
|
||||
" for (int i = 0; i < value.length(); i++) {\n" +
|
||||
" if (!Character.isDigit(value.charAt(i))) {\n" +
|
||||
" allNumeric = false;\n" +
|
||||
" break;\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
" if (allNumeric) {\n" +
|
||||
" highestRegistered = parts.get(parts.size() - 2) + '.' + parts.get(parts.size() - 1);\n" +
|
||||
" } else {\n" +
|
||||
" highestRegistered = parts.get(parts.size() - 1);\n" +
|
||||
" }\n" +
|
||||
"\n" +
|
||||
" } else {\n" +
|
||||
" highestRegistered = parts.get(parts.size() - 1);\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
" } else {\n" +
|
||||
" /* HRD is the top private domain */\n" +
|
||||
" highestRegistered = topPrivateDomain(normalizedHost, parts, publicSuffixIndex);\n" +
|
||||
" }\n" +
|
||||
" String subDomain = host.substring(0, host.length() - highestRegistered.length());\n" +
|
||||
" if (subDomain.endsWith('.')) {\n" +
|
||||
" subDomain = subDomain.substring(0, subDomain.length() - 1);\n" +
|
||||
" }\n" +
|
||||
" return [subDomain, highestRegistered];\n" +
|
||||
"}\n";
|
||||
fn = fn.replace("\n","");
|
||||
function = fn;
|
||||
}
|
||||
}
|
|
@ -1,187 +0,0 @@
|
|||
String replaceDots(String input) {
|
||||
String output = input;
|
||||
if (output.indexOf('。') >= 0) {
|
||||
output = output.replace('。', '.');
|
||||
}
|
||||
if (output.indexOf('.') >= 0) {
|
||||
output = output.replace('.', '.');
|
||||
}
|
||||
if (output.indexOf('。') >= 0) {
|
||||
output = output.replace('。', '.');
|
||||
}
|
||||
return output;
|
||||
}
|
||||
List split(String value) {
|
||||
int nextWord = 0;
|
||||
List splits = [];
|
||||
for(int i = 0; i < value.length(); i++) {
|
||||
if(value.charAt(i) == (char)'.') {
|
||||
splits.add(value.substring(nextWord, i));
|
||||
nextWord = i+1;
|
||||
}
|
||||
}
|
||||
if (nextWord != value.length()) {
|
||||
splits.add(value.substring(nextWord, value.length()));
|
||||
}
|
||||
return splits;
|
||||
}
|
||||
List splitDomain(String domain) {
|
||||
String dotDomain = replaceDots(domain);
|
||||
return split(dotDomain);
|
||||
}
|
||||
boolean validateSyntax(List parts) {
|
||||
int lastIndex = parts.length - 1;
|
||||
/* Validate the last part specially, as it has different syntax rules. */
|
||||
if (!validatePart(parts[lastIndex], true)) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < lastIndex; i++) {
|
||||
String part = parts[i];
|
||||
if (!validatePart(part, false)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
boolean validatePart(String part, boolean isFinalPart) {
|
||||
int MAX_DOMAIN_PART_LENGTH = 63;
|
||||
if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) {
|
||||
return false;
|
||||
}
|
||||
int offset = 0;
|
||||
int strLen = part.length();
|
||||
while (offset < strLen) {
|
||||
int curChar = part.charAt(offset);
|
||||
offset += 1;
|
||||
if (!(Character.isLetterOrDigit(curChar) || curChar == (char)'-' || curChar == (char)'_')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (part.charAt(0) == (char)'-' || part.charAt(0) == (char)'_' ||
|
||||
part.charAt(part.length() - 1) == (char)'-' || part.charAt(part.length() - 1) == (char)'_') {
|
||||
return false;
|
||||
}
|
||||
if (isFinalPart && Character.isDigit(part.charAt(0))) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int findPublicSuffix(Map params, List parts) {
|
||||
int partsSize = parts.size();
|
||||
|
||||
for (int i = 0; i < partsSize; i++) {
|
||||
StringJoiner joiner = new StringJoiner('.');
|
||||
for (String s : parts.subList(i, partsSize)) {
|
||||
joiner.add(s);
|
||||
}
|
||||
/* parts.subList(i, partsSize).each(joiner::add); */
|
||||
String ancestorName = joiner.toString();
|
||||
|
||||
if (params['exact'].containsKey(ancestorName)) {
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Excluded domains (e.g. !nhs.uk) use the next highest
|
||||
domain as the effective public suffix (e.g. uk). */
|
||||
|
||||
if (params['excluded'].containsKey(ancestorName)) {
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
List pieces = split(ancestorName);
|
||||
if (pieces.length >= 2 && params['under'].containsKey(pieces[1])) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
String ancestor(List parts, int levels) {
|
||||
StringJoiner joiner = new StringJoiner('.');
|
||||
for (String s : parts.subList(levels, parts.size())) {
|
||||
joiner.add(s);
|
||||
}
|
||||
String name = joiner.toString();
|
||||
if (name.endsWith('.')) {
|
||||
name = name.substring(0, name.length() - 1);
|
||||
}
|
||||
return name;
|
||||
}
|
||||
String topPrivateDomain(String name, List parts, int publicSuffixIndex) {
|
||||
if (publicSuffixIndex == 1) {
|
||||
return name;
|
||||
}
|
||||
if (!(publicSuffixIndex > 0)) {
|
||||
throw new IllegalArgumentException('Not under a public suffix: ' + name);
|
||||
}
|
||||
return ancestor(parts, publicSuffixIndex - 1);
|
||||
}
|
||||
List domainSplit(String host, Map params) {
|
||||
int MAX_DNS_NAME_LENGTH = 253;
|
||||
int MAX_LENGTH = 253;
|
||||
int MAX_PARTS = 127;
|
||||
if ('host'.isEmpty()) {
|
||||
return ['',''];
|
||||
}
|
||||
host = host.trim();
|
||||
if (host.contains(':')) {
|
||||
return ['', host];
|
||||
}
|
||||
boolean tentativeIP = true;
|
||||
for(int i = 0; i < host.length(); i++) {
|
||||
if (!(Character.isDigit(host.charAt(i)) || host.charAt(i) == (char)'.')) {
|
||||
tentativeIP = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (tentativeIP) {
|
||||
/* special-snowflake rules now... */
|
||||
if (host == '.') {
|
||||
return ['',''];
|
||||
}
|
||||
return ['', host];
|
||||
}
|
||||
def normalizedHost = host;
|
||||
normalizedHost = normalizedHost.toLowerCase();
|
||||
List parts = splitDomain(normalizedHost);
|
||||
int publicSuffixIndex = findPublicSuffix(params, parts);
|
||||
if (publicSuffixIndex == 0) {
|
||||
return ['', host];
|
||||
}
|
||||
String highestRegistered = '';
|
||||
/* for the case where the host is internal like .local so is not a recognised public suffix */
|
||||
if (publicSuffixIndex == -1) {
|
||||
if (!parts.isEmpty()) {
|
||||
if (parts.size() == 1) {
|
||||
return ['', host];
|
||||
}
|
||||
if (parts.size() > 2) {
|
||||
boolean allNumeric = true;
|
||||
String value = parts.get(parts.size() - 1);
|
||||
for (int i = 0; i < value.length(); i++) {
|
||||
if (!Character.isDigit(value.charAt(i))) {
|
||||
allNumeric = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allNumeric) {
|
||||
highestRegistered = parts.get(parts.size() - 2) + '.' + parts.get(parts.size() - 1);
|
||||
} else {
|
||||
highestRegistered = parts.get(parts.size() - 1);
|
||||
}
|
||||
|
||||
} else {
|
||||
highestRegistered = parts.get(parts.size() - 1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* HRD is the top private domain */
|
||||
highestRegistered = topPrivateDomain(normalizedHost, parts, publicSuffixIndex);
|
||||
}
|
||||
String subDomain = host.substring(0, host.length() - highestRegistered.length());
|
||||
if (subDomain.endsWith('.')) {
|
||||
subDomain = subDomain.substring(0, subDomain.length() - 1);
|
||||
}
|
||||
return [subDomain, highestRegistered];
|
||||
}
|
||||
|
|
@ -23,6 +23,7 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class DatafeedConfigTests extends AbstractSerializingTestCase<DatafeedConfig> {
|
||||
|
@ -253,6 +254,30 @@ public class DatafeedConfigTests extends AbstractSerializingTestCase<DatafeedCon
|
|||
assertThat(e.getMessage(), equalTo("script_fields cannot be used in combination with aggregations"));
|
||||
}
|
||||
|
||||
public void testDomainSplitInjection() {
|
||||
DatafeedConfig.Builder datafeed = new DatafeedConfig.Builder("datafeed1", "job1");
|
||||
datafeed.setIndexes(Arrays.asList("my_index"));
|
||||
datafeed.setTypes(Arrays.asList("my_type"));
|
||||
|
||||
SearchSourceBuilder.ScriptField withoutSplit = new SearchSourceBuilder.ScriptField(
|
||||
"script1", new Script("return 1+1;"), false);
|
||||
SearchSourceBuilder.ScriptField withSplit = new SearchSourceBuilder.ScriptField(
|
||||
"script2", new Script("return domainSplit('foo.com', params);"), false);
|
||||
datafeed.setScriptFields(Arrays.asList(withoutSplit, withSplit));
|
||||
|
||||
DatafeedConfig config = datafeed.build();
|
||||
List<SearchSourceBuilder.ScriptField> scriptFields = config.getScriptFields();
|
||||
|
||||
assertThat(scriptFields.size(), equalTo(2));
|
||||
assertThat(scriptFields.get(0).fieldName(), equalTo("script1"));
|
||||
assertThat(scriptFields.get(0).script().getIdOrCode(), equalTo("return 1+1;"));
|
||||
assertFalse(scriptFields.get(0).script().getParams().containsKey("exact"));
|
||||
|
||||
assertThat(scriptFields.get(1).fieldName(), equalTo("script2"));
|
||||
assertThat(scriptFields.get(1).script().getIdOrCode(), containsString("List domainSplit(String host, Map params)"));
|
||||
assertTrue(scriptFields.get(1).script().getParams().containsKey("exact"));
|
||||
}
|
||||
|
||||
public static String randomValidDatafeedId() {
|
||||
CodepointSetGenerator generator = new CodepointSetGenerator("abcdefghijklmnopqrstuvwxyz".toCharArray());
|
||||
return generator.ofCodePointsLength(random(), 10, 10);
|
||||
|
|
|
@ -13,17 +13,17 @@ import org.elasticsearch.client.Response;
|
|||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.test.StreamsUtils;
|
||||
import org.elasticsearch.test.rest.ESRestTestCase;
|
||||
import org.elasticsearch.xpack.ml.MlPlugin;
|
||||
import org.elasticsearch.xpack.ml.utils.DomainSplitFunction;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.ResourceBundle;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -32,88 +32,6 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
|
||||
public class PainlessDomainSplitIT extends ESRestTestCase {
|
||||
|
||||
private static final Map<String, String> EXCLUDED;
|
||||
|
||||
static {
|
||||
EXCLUDED = new HashMap<>();
|
||||
EXCLUDED.put("city.yokohama.jp", "i");
|
||||
EXCLUDED.put("teledata.mz", "i");
|
||||
EXCLUDED.put("city.kobe.jp", "i");
|
||||
EXCLUDED.put("city.sapporo.jp", "i");
|
||||
EXCLUDED.put("city.kawasaki.jp", "i");
|
||||
EXCLUDED.put("city.nagoya.jp", "i");
|
||||
EXCLUDED.put("www.ck", "i");
|
||||
EXCLUDED.put("city.sendai.jp", "i");
|
||||
EXCLUDED.put("city.kitakyushu.jp", "i");
|
||||
}
|
||||
|
||||
private static final Map<String, String> UNDER;
|
||||
|
||||
static {
|
||||
UNDER = new HashMap<>();
|
||||
UNDER.put("bd", "i");
|
||||
UNDER.put("np", "i");
|
||||
UNDER.put("jm", "i");
|
||||
UNDER.put("fj", "i");
|
||||
UNDER.put("fk", "i");
|
||||
UNDER.put("ye", "i");
|
||||
UNDER.put("sch.uk", "i");
|
||||
UNDER.put("bn", "i");
|
||||
UNDER.put("kitakyushu.jp", "i");
|
||||
UNDER.put("kobe.jp", "i");
|
||||
UNDER.put("ke", "i");
|
||||
UNDER.put("sapporo.jp", "i");
|
||||
UNDER.put("kh", "i");
|
||||
UNDER.put("mm", "i");
|
||||
UNDER.put("il", "i");
|
||||
UNDER.put("yokohama.jp", "i");
|
||||
UNDER.put("ck", "i");
|
||||
UNDER.put("nagoya.jp", "i");
|
||||
UNDER.put("sendai.jp", "i");
|
||||
UNDER.put("kw", "i");
|
||||
UNDER.put("er", "i");
|
||||
UNDER.put("mz", "i");
|
||||
UNDER.put("platform.sh", "p");
|
||||
UNDER.put("gu", "i");
|
||||
UNDER.put("nom.br", "i");
|
||||
UNDER.put("zm", "i");
|
||||
UNDER.put("pg", "i");
|
||||
UNDER.put("ni", "i");
|
||||
UNDER.put("kawasaki.jp", "i");
|
||||
UNDER.put("zw", "i");
|
||||
}
|
||||
|
||||
public static final Map<String, Object> params;
|
||||
|
||||
static {
|
||||
params = new HashMap<>();
|
||||
|
||||
ResourceBundle resource = ResourceBundle.getBundle("org/elasticsearch/xpack/ml/transforms/exact", Locale.getDefault());
|
||||
Enumeration keys = resource.getKeys();
|
||||
Map<String, String> exact = new HashMap<>(2048);
|
||||
while (keys.hasMoreElements()) {
|
||||
String key = (String) keys.nextElement();
|
||||
String value = resource.getString(key);
|
||||
exact.put(key, value);
|
||||
}
|
||||
params.put("excluded", EXCLUDED);
|
||||
params.put("under", UNDER);
|
||||
params.put("exact", exact);
|
||||
}
|
||||
|
||||
static String domainSplitFunction;
|
||||
|
||||
static {
|
||||
|
||||
try {
|
||||
domainSplitFunction = StreamsUtils.copyToStringFromClasspath("/org/elasticsearch/xpack/ml/transforms/script.painless")
|
||||
.replace("\n", "");
|
||||
} catch (Exception e) {
|
||||
domainSplitFunction = "";
|
||||
fail(e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
static class TestConfiguration {
|
||||
public String subDomainExpected;
|
||||
public String domainExpected;
|
||||
|
@ -272,7 +190,12 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
|
|||
new StringEntity("{ \"settings\": " + Strings.toString(settings) + " }")));
|
||||
}
|
||||
|
||||
public void testBasics() throws Exception {
|
||||
private void createIndex(String name, Settings settings, String mapping) throws IOException {
|
||||
assertOK(client().performRequest("PUT", name, Collections.emptyMap(),
|
||||
new StringEntity("{ \"settings\": " + Strings.toString(settings) + ", \"mappings\" : {" + mapping + "} }")));
|
||||
}
|
||||
|
||||
public void testIsolated() throws Exception {
|
||||
|
||||
Settings.Builder settings = Settings.builder()
|
||||
.put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1)
|
||||
|
@ -284,6 +207,8 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
|
|||
|
||||
Pattern pattern = Pattern.compile("domain_split\":\\[(.*?),(.*?)\\]");
|
||||
|
||||
Map<String, Object> params = new HashMap<>(DomainSplitFunction.params.size() + 1);
|
||||
params.putAll(DomainSplitFunction.params);
|
||||
for (TestConfiguration testConfig : tests) {
|
||||
params.put("host", testConfig.hostName);
|
||||
String mapAsJson = new ObjectMapper().writeValueAsString(params);
|
||||
|
@ -296,7 +221,7 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
|
|||
" \"domain_split\" : {\n" +
|
||||
" \"script\" : {\n" +
|
||||
" \"lang\": \"painless\",\n" +
|
||||
" \"inline\": \"" + domainSplitFunction + " return domainSplit(params['host'], params); \",\n" +
|
||||
" \"inline\": \"" + DomainSplitFunction.function + " return domainSplit(params['host'], params); \",\n" +
|
||||
" \"params\": " + mapAsJson + "\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
|
@ -326,6 +251,117 @@ public class PainlessDomainSplitIT extends ESRestTestCase {
|
|||
assertThat("Expected domain [" + testConfig.domainExpected + "] but found [" + actualDomain + "]. Actual "
|
||||
+ actualTotal + " vs Expected " + expectedTotal, actualDomain, equalTo(testConfig.domainExpected));
|
||||
}
|
||||
}
|
||||
|
||||
public void testHRDSplit() throws Exception {
|
||||
|
||||
// Create job
|
||||
String job = "{\n" +
|
||||
" \"description\":\"Domain splitting\",\n" +
|
||||
" \"analysis_config\" : {\n" +
|
||||
" \"bucket_span\":3600,\n" +
|
||||
" \"detectors\" :[{\"function\":\"count\", \"by_field_name\" : \"domain_split\"}]\n" +
|
||||
" },\n" +
|
||||
" \"data_description\" : {\n" +
|
||||
" \"field_delimiter\":\",\",\n" +
|
||||
" \"time_field\":\"time\"\n" +
|
||||
" \n" +
|
||||
" }\n" +
|
||||
" }";
|
||||
|
||||
client().performRequest("PUT", MlPlugin.BASE_PATH + "anomaly_detectors/painless", Collections.emptyMap(), new StringEntity(job));
|
||||
client().performRequest("POST", MlPlugin.BASE_PATH + "anomaly_detectors/painless/_open");
|
||||
|
||||
// Create index to hold data
|
||||
Settings.Builder settings = Settings.builder()
|
||||
.put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1)
|
||||
.put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0);
|
||||
|
||||
createIndex("painless", settings.build(), "\"test\": { \"properties\": { \"domain\": { \"type\": \"keyword\" },\"time\": { " +
|
||||
"\"type\": \"date\" } } }");
|
||||
|
||||
// Index some data
|
||||
DateTime baseTime = new DateTime().minusYears(1);
|
||||
TestConfiguration test = tests.get(randomInt(tests.size()-1));
|
||||
Pattern pattern = Pattern.compile("by_field_value\":\"(" + test.subDomainExpected + "),(" + test.domainExpected +")\",");
|
||||
for (int i = 0; i < 100; i++) {
|
||||
|
||||
DateTime time = baseTime.plusHours(i);
|
||||
if (i == 64) {
|
||||
// Anomaly has 100 docs, but we don't care about the value
|
||||
for (int j = 0; j < 100; j++) {
|
||||
client().performRequest("PUT", "painless/test/" + time.toDateTimeISO() + "_" + j,
|
||||
Collections.emptyMap(),
|
||||
new StringEntity("{\"domain\": \"" + "bar.bar.com\", \"time\": \"" + time.toDateTimeISO() + "\"}"));
|
||||
}
|
||||
} else {
|
||||
// Non-anomalous values will be what's seen when the anomaly is reported
|
||||
client().performRequest("PUT", "painless/test/" + time.toDateTimeISO(),
|
||||
Collections.emptyMap(),
|
||||
new StringEntity("{\"domain\": \"" + test.hostName + "\", \"time\": \"" + time.toDateTimeISO() + "\"}"));
|
||||
}
|
||||
}
|
||||
|
||||
client().performRequest("POST", "painless/_refresh");
|
||||
|
||||
// Create and start datafeed
|
||||
String body = "{\n" +
|
||||
" \"job_id\":\"painless\",\n" +
|
||||
" \"indexes\":[\"painless\"],\n" +
|
||||
" \"types\":[\"test\"],\n" +
|
||||
" \"script_fields\": {\n" +
|
||||
" \"domain_split\": {\n" +
|
||||
" \"script\": \"return domainSplit(doc['domain'].value, params);\"\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
" }";
|
||||
|
||||
client().performRequest("PUT", MlPlugin.BASE_PATH + "datafeeds/painless", Collections.emptyMap(), new StringEntity(body));
|
||||
client().performRequest("POST", MlPlugin.BASE_PATH + "datafeeds/painless/_start");
|
||||
|
||||
boolean passed = awaitBusy(() -> {
|
||||
try {
|
||||
client().performRequest("POST", "/_refresh");
|
||||
|
||||
Response response = client().performRequest("GET", MlPlugin.BASE_PATH + "anomaly_detectors/painless/results/records");
|
||||
String responseBody = EntityUtils.toString(response.getEntity());
|
||||
|
||||
if (responseBody.contains("\"count\":2")) {
|
||||
Matcher m = pattern.matcher(responseBody);
|
||||
|
||||
String actualSubDomain = "";
|
||||
String actualDomain = "";
|
||||
if (m.find()) {
|
||||
actualSubDomain = m.group(1).replace("\"", "");
|
||||
actualDomain = m.group(2).replace("\"", "");
|
||||
}
|
||||
|
||||
String expectedTotal = "[" + test.subDomainExpected + "," + test.domainExpected + "]";
|
||||
String actualTotal = "[" + actualSubDomain + "," + actualDomain + "]";
|
||||
|
||||
// domainSplit() tests had subdomain, testHighestRegisteredDomainCases() do not
|
||||
if (test.subDomainExpected != null) {
|
||||
assertThat("Expected subdomain [" + test.subDomainExpected + "] but found [" + actualSubDomain + "]. Actual "
|
||||
+ actualTotal + " vs Expected " + expectedTotal, actualSubDomain, equalTo(test.subDomainExpected));
|
||||
}
|
||||
|
||||
assertThat("Expected domain [" + test.domainExpected + "] but found [" + actualDomain + "]. Actual "
|
||||
+ actualTotal + " vs Expected " + expectedTotal, actualDomain, equalTo(test.domainExpected));
|
||||
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error(e.getMessage());
|
||||
return false;
|
||||
}
|
||||
|
||||
}, 5, TimeUnit.SECONDS);
|
||||
|
||||
if (!passed) {
|
||||
fail("Anomaly records were not found within 5 seconds");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue