diff --git a/elasticsearch/src/main/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfig.java b/elasticsearch/src/main/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfig.java index 17b306c2c08..ad2e6337c41 100644 --- a/elasticsearch/src/main/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfig.java +++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfig.java @@ -18,10 +18,12 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryParseContext; +import org.elasticsearch.script.Script; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.xpack.ml.job.config.Job; import org.elasticsearch.xpack.ml.job.messages.Messages; +import org.elasticsearch.xpack.ml.utils.DomainSplitFunction; import org.elasticsearch.xpack.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.ml.utils.MlStrings; @@ -29,7 +31,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; /** @@ -401,7 +405,27 @@ public class DatafeedConfig extends AbstractDiffable implements } public void setScriptFields(List scriptFields) { - List sorted = new ArrayList<>(scriptFields); + List sorted = new ArrayList<>(); + for (SearchSourceBuilder.ScriptField scriptField : scriptFields) { + String script = scriptField.script().getIdOrCode(); + + if (script.contains("domainSplit(")) { + String modifiedCode = DomainSplitFunction.function + "\n" + script; + Map modifiedParams = new HashMap<>(scriptField.script().getParams().size() + + DomainSplitFunction.params.size()); + + modifiedParams.putAll(scriptField.script().getParams()); + modifiedParams.putAll(DomainSplitFunction.params); + + Script newScript = new Script(scriptField.script().getType(), scriptField.script().getLang(), + modifiedCode, modifiedParams); + + sorted.add(new SearchSourceBuilder.ScriptField(scriptField.fieldName(), newScript, scriptField.ignoreFailure())); + } else { + sorted.add(scriptField); + } + + } sorted.sort(Comparator.comparing(SearchSourceBuilder.ScriptField::fieldName)); this.scriptFields = sorted; } diff --git a/elasticsearch/src/main/java/org/elasticsearch/xpack/ml/utils/DomainSplitFunction.java b/elasticsearch/src/main/java/org/elasticsearch/xpack/ml/utils/DomainSplitFunction.java new file mode 100644 index 00000000000..22539124dcf --- /dev/null +++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/ml/utils/DomainSplitFunction.java @@ -0,0 +1,277 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.utils; + +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.ResourceBundle; + +public final class DomainSplitFunction { + + public static final String function; + public static final Map params; + + DomainSplitFunction() {} + + static { + Map paramsMap = new HashMap<>(); + + ResourceBundle resource = ResourceBundle.getBundle("org/elasticsearch/xpack/ml/transforms/exact", Locale.getDefault()); + Enumeration keys = resource.getKeys(); + Map exact = new HashMap<>(2048); + while (keys.hasMoreElements()) { + String key = keys.nextElement(); + String value = resource.getString(key); + exact.put(key, value); + } + exact = Collections.unmodifiableMap(exact); + + Map under = new HashMap<>(30); + under.put("bd", "i"); + under.put("np", "i"); + under.put("jm", "i"); + under.put("fj", "i"); + under.put("fk", "i"); + under.put("ye", "i"); + under.put("sch.uk", "i"); + under.put("bn", "i"); + under.put("kitakyushu.jp", "i"); + under.put("kobe.jp", "i"); + under.put("ke", "i"); + under.put("sapporo.jp", "i"); + under.put("kh", "i"); + under.put("mm", "i"); + under.put("il", "i"); + under.put("yokohama.jp", "i"); + under.put("ck", "i"); + under.put("nagoya.jp", "i"); + under.put("sendai.jp", "i"); + under.put("kw", "i"); + under.put("er", "i"); + under.put("mz", "i"); + under.put("platform.sh", "p"); + under.put("gu", "i"); + under.put("nom.br", "i"); + under.put("zm", "i"); + under.put("pg", "i"); + under.put("ni", "i"); + under.put("kawasaki.jp", "i"); + under.put("zw", "i"); + under = Collections.unmodifiableMap(under); + + Map excluded = new HashMap<>(9); + excluded.put("city.yokohama.jp", "i"); + excluded.put("teledata.mz", "i"); + excluded.put("city.kobe.jp", "i"); + excluded.put("city.sapporo.jp", "i"); + excluded.put("city.kawasaki.jp", "i"); + excluded.put("city.nagoya.jp", "i"); + excluded.put("www.ck", "i"); + excluded.put("city.sendai.jp", "i"); + excluded.put("city.kitakyushu.jp", "i"); + excluded = Collections.unmodifiableMap(excluded); + + + paramsMap.put("excluded", excluded); + paramsMap.put("under", under); + paramsMap.put("exact", exact); + params = Collections.unmodifiableMap(paramsMap); + } + + static { + String fn = "String replaceDots(String input) {\n" + + " String output = input;\n" + + " if (output.indexOf('。') >= 0) {\n" + + " output = output.replace('。', '.');\n" + + " }\n" + + " if (output.indexOf('.') >= 0) {\n" + + " output = output.replace('.', '.');\n" + + " }\n" + + " if (output.indexOf('。') >= 0) {\n" + + " output = output.replace('。', '.');\n" + + " }\n" + + " return output;\n" + + "}\n" + + "List split(String value) {\n" + + " int nextWord = 0;\n" + + " List splits = [];\n" + + " for(int i = 0; i < value.length(); i++) {\n" + + " if(value.charAt(i) == (char)'.') {\n" + + " splits.add(value.substring(nextWord, i));\n" + + " nextWord = i+1;\n" + + " }\n" + + " }\n" + + " if (nextWord != value.length()) {\n" + + " splits.add(value.substring(nextWord, value.length()));\n" + + " }\n" + + " return splits;\n" + + "}\n" + + "List splitDomain(String domain) {\n" + + " String dotDomain = replaceDots(domain);\n" + + " return split(dotDomain);\n" + + "}\n" + + "boolean validateSyntax(List parts) {\n" + + " int lastIndex = parts.length - 1;\n" + + " /* Validate the last part specially, as it has different syntax rules. */\n" + + " if (!validatePart(parts[lastIndex], true)) {\n" + + " return false;\n" + + " }\n" + + " for (int i = 0; i < lastIndex; i++) {\n" + + " String part = parts[i];\n" + + " if (!validatePart(part, false)) {\n" + + " return false;\n" + + " }\n" + + " }\n" + + " return true;\n" + + "}\n" + + "boolean validatePart(String part, boolean isFinalPart) {\n" + + " int MAX_DOMAIN_PART_LENGTH = 63;\n" + + " if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) {\n" + + " return false;\n" + + " }\n" + + " int offset = 0;\n" + + " int strLen = part.length();\n" + + " while (offset < strLen) {\n" + + " int curChar = part.charAt(offset);\n" + + " offset += 1;\n" + + " if (!(Character.isLetterOrDigit(curChar) || curChar == (char)'-' || curChar == (char)'_')) {\n" + + " return false;\n" + + " }\n" + + " }\n" + + " if (part.charAt(0) == (char)'-' || part.charAt(0) == (char)'_' ||\n" + + " part.charAt(part.length() - 1) == (char)'-' || part.charAt(part.length() - 1) == (char)'_') {\n" + + " return false;\n" + + " }\n" + + " if (isFinalPart && Character.isDigit(part.charAt(0))) {\n" + + " return false;\n" + + " }\n" + + " return true;\n" + + "}\n" + + "int findPublicSuffix(Map params, List parts) {\n" + + " int partsSize = parts.size();\n" + + "\n" + + " for (int i = 0; i < partsSize; i++) {\n" + + " StringJoiner joiner = new StringJoiner('.');\n" + + " for (String s : parts.subList(i, partsSize)) {\n" + + " joiner.add(s);\n" + + " }\n" + + " /* parts.subList(i, partsSize).each(joiner::add); */\n" + + " String ancestorName = joiner.toString();\n" + + "\n" + + " if (params['exact'].containsKey(ancestorName)) {\n" + + " return i;\n" + + " }\n" + + "\n" + + " /* Excluded domains (e.g. !nhs.uk) use the next highest\n" + + " domain as the effective public suffix (e.g. uk). */\n" + + "\n" + + " if (params['excluded'].containsKey(ancestorName)) {\n" + + " return i + 1;\n" + + " }\n" + + "\n" + + " List pieces = split(ancestorName);\n" + + " if (pieces.length >= 2 && params['under'].containsKey(pieces[1])) {\n" + + " return i;\n" + + " }\n" + + " }\n" + + "\n" + + " return -1;\n" + + "}\n" + + "String ancestor(List parts, int levels) {\n" + + " StringJoiner joiner = new StringJoiner('.');\n" + + " for (String s : parts.subList(levels, parts.size())) {\n" + + " joiner.add(s);\n" + + " }\n" + + " String name = joiner.toString();\n" + + " if (name.endsWith('.')) {\n" + + " name = name.substring(0, name.length() - 1);\n" + + " }\n" + + " return name;\n" + + "}\n" + + "String topPrivateDomain(String name, List parts, int publicSuffixIndex) {\n" + + " if (publicSuffixIndex == 1) {\n" + + " return name;\n" + + " }\n" + + " if (!(publicSuffixIndex > 0)) {\n" + + " throw new IllegalArgumentException('Not under a public suffix: ' + name);\n" + + " }\n" + + " return ancestor(parts, publicSuffixIndex - 1);\n" + + "}\n" + + "List domainSplit(String host, Map params) {\n" + + " int MAX_DNS_NAME_LENGTH = 253;\n" + + " int MAX_LENGTH = 253;\n" + + " int MAX_PARTS = 127;\n" + + " if ('host'.isEmpty()) {\n" + + " return ['',''];\n" + + " }\n" + + " host = host.trim();\n" + + " if (host.contains(':')) {\n" + + " return ['', host];\n" + + " }\n" + + " boolean tentativeIP = true;\n" + + " for(int i = 0; i < host.length(); i++) {\n" + + " if (!(Character.isDigit(host.charAt(i)) || host.charAt(i) == (char)'.')) {\n" + + " tentativeIP = false;\n" + + " break;\n" + + " }\n" + + " }\n" + + " if (tentativeIP) {\n" + + " /* special-snowflake rules now... */\n" + + " if (host == '.') {\n" + + " return ['',''];\n" + + " }\n" + + " return ['', host];\n" + + " }\n" + + " def normalizedHost = host;\n" + + " normalizedHost = normalizedHost.toLowerCase();\n" + + " List parts = splitDomain(normalizedHost);\n" + + " int publicSuffixIndex = findPublicSuffix(params, parts);\n" + + " if (publicSuffixIndex == 0) {\n" + + " return ['', host];\n" + + " }\n" + + " String highestRegistered = '';\n" + + " /* for the case where the host is internal like .local so is not a recognised public suffix */\n" + + " if (publicSuffixIndex == -1) {\n" + + " if (!parts.isEmpty()) {\n" + + " if (parts.size() == 1) {\n" + + " return ['', host];\n" + + " }\n" + + " if (parts.size() > 2) {\n" + + " boolean allNumeric = true;\n" + + " String value = parts.get(parts.size() - 1);\n" + + " for (int i = 0; i < value.length(); i++) {\n" + + " if (!Character.isDigit(value.charAt(i))) {\n" + + " allNumeric = false;\n" + + " break;\n" + + " }\n" + + " }\n" + + " if (allNumeric) {\n" + + " highestRegistered = parts.get(parts.size() - 2) + '.' + parts.get(parts.size() - 1);\n" + + " } else {\n" + + " highestRegistered = parts.get(parts.size() - 1);\n" + + " }\n" + + "\n" + + " } else {\n" + + " highestRegistered = parts.get(parts.size() - 1);\n" + + " }\n" + + " }\n" + + " } else {\n" + + " /* HRD is the top private domain */\n" + + " highestRegistered = topPrivateDomain(normalizedHost, parts, publicSuffixIndex);\n" + + " }\n" + + " String subDomain = host.substring(0, host.length() - highestRegistered.length());\n" + + " if (subDomain.endsWith('.')) {\n" + + " subDomain = subDomain.substring(0, subDomain.length() - 1);\n" + + " }\n" + + " return [subDomain, highestRegistered];\n" + + "}\n"; + fn = fn.replace("\n",""); + function = fn; + } +} diff --git a/elasticsearch/src/main/resources/org/elasticsearch/xpack/ml/transforms/script.painless b/elasticsearch/src/main/resources/org/elasticsearch/xpack/ml/transforms/script.painless deleted file mode 100644 index 4b177138601..00000000000 --- a/elasticsearch/src/main/resources/org/elasticsearch/xpack/ml/transforms/script.painless +++ /dev/null @@ -1,187 +0,0 @@ -String replaceDots(String input) { - String output = input; - if (output.indexOf('。') >= 0) { - output = output.replace('。', '.'); - } - if (output.indexOf('.') >= 0) { - output = output.replace('.', '.'); - } - if (output.indexOf('。') >= 0) { - output = output.replace('。', '.'); - } - return output; -} -List split(String value) { - int nextWord = 0; - List splits = []; - for(int i = 0; i < value.length(); i++) { - if(value.charAt(i) == (char)'.') { - splits.add(value.substring(nextWord, i)); - nextWord = i+1; - } - } - if (nextWord != value.length()) { - splits.add(value.substring(nextWord, value.length())); - } - return splits; -} -List splitDomain(String domain) { - String dotDomain = replaceDots(domain); - return split(dotDomain); -} -boolean validateSyntax(List parts) { - int lastIndex = parts.length - 1; - /* Validate the last part specially, as it has different syntax rules. */ - if (!validatePart(parts[lastIndex], true)) { - return false; - } - for (int i = 0; i < lastIndex; i++) { - String part = parts[i]; - if (!validatePart(part, false)) { - return false; - } - } - return true; -} -boolean validatePart(String part, boolean isFinalPart) { - int MAX_DOMAIN_PART_LENGTH = 63; - if (part.length() < 1 || part.length() > MAX_DOMAIN_PART_LENGTH) { - return false; - } - int offset = 0; - int strLen = part.length(); - while (offset < strLen) { - int curChar = part.charAt(offset); - offset += 1; - if (!(Character.isLetterOrDigit(curChar) || curChar == (char)'-' || curChar == (char)'_')) { - return false; - } - } - if (part.charAt(0) == (char)'-' || part.charAt(0) == (char)'_' || - part.charAt(part.length() - 1) == (char)'-' || part.charAt(part.length() - 1) == (char)'_') { - return false; - } - if (isFinalPart && Character.isDigit(part.charAt(0))) { - return false; - } - return true; -} -int findPublicSuffix(Map params, List parts) { - int partsSize = parts.size(); - - for (int i = 0; i < partsSize; i++) { - StringJoiner joiner = new StringJoiner('.'); - for (String s : parts.subList(i, partsSize)) { - joiner.add(s); - } - /* parts.subList(i, partsSize).each(joiner::add); */ - String ancestorName = joiner.toString(); - - if (params['exact'].containsKey(ancestorName)) { - return i; - } - - /* Excluded domains (e.g. !nhs.uk) use the next highest - domain as the effective public suffix (e.g. uk). */ - - if (params['excluded'].containsKey(ancestorName)) { - return i + 1; - } - - List pieces = split(ancestorName); - if (pieces.length >= 2 && params['under'].containsKey(pieces[1])) { - return i; - } - } - - return -1; -} -String ancestor(List parts, int levels) { - StringJoiner joiner = new StringJoiner('.'); - for (String s : parts.subList(levels, parts.size())) { - joiner.add(s); - } - String name = joiner.toString(); - if (name.endsWith('.')) { - name = name.substring(0, name.length() - 1); - } - return name; -} -String topPrivateDomain(String name, List parts, int publicSuffixIndex) { - if (publicSuffixIndex == 1) { - return name; - } - if (!(publicSuffixIndex > 0)) { - throw new IllegalArgumentException('Not under a public suffix: ' + name); - } - return ancestor(parts, publicSuffixIndex - 1); -} -List domainSplit(String host, Map params) { - int MAX_DNS_NAME_LENGTH = 253; - int MAX_LENGTH = 253; - int MAX_PARTS = 127; - if ('host'.isEmpty()) { - return ['','']; - } - host = host.trim(); - if (host.contains(':')) { - return ['', host]; - } - boolean tentativeIP = true; - for(int i = 0; i < host.length(); i++) { - if (!(Character.isDigit(host.charAt(i)) || host.charAt(i) == (char)'.')) { - tentativeIP = false; - break; - } - } - if (tentativeIP) { - /* special-snowflake rules now... */ - if (host == '.') { - return ['','']; - } - return ['', host]; - } - def normalizedHost = host; - normalizedHost = normalizedHost.toLowerCase(); - List parts = splitDomain(normalizedHost); - int publicSuffixIndex = findPublicSuffix(params, parts); - if (publicSuffixIndex == 0) { - return ['', host]; - } - String highestRegistered = ''; - /* for the case where the host is internal like .local so is not a recognised public suffix */ - if (publicSuffixIndex == -1) { - if (!parts.isEmpty()) { - if (parts.size() == 1) { - return ['', host]; - } - if (parts.size() > 2) { - boolean allNumeric = true; - String value = parts.get(parts.size() - 1); - for (int i = 0; i < value.length(); i++) { - if (!Character.isDigit(value.charAt(i))) { - allNumeric = false; - break; - } - } - if (allNumeric) { - highestRegistered = parts.get(parts.size() - 2) + '.' + parts.get(parts.size() - 1); - } else { - highestRegistered = parts.get(parts.size() - 1); - } - - } else { - highestRegistered = parts.get(parts.size() - 1); - } - } - } else { - /* HRD is the top private domain */ - highestRegistered = topPrivateDomain(normalizedHost, parts, publicSuffixIndex); - } - String subDomain = host.substring(0, host.length() - highestRegistered.length()); - if (subDomain.endsWith('.')) { - subDomain = subDomain.substring(0, subDomain.length() - 1); - } - return [subDomain, highestRegistered]; -} - diff --git a/elasticsearch/src/test/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfigTests.java b/elasticsearch/src/test/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfigTests.java index 52abbcb1ca3..8f058fe9794 100644 --- a/elasticsearch/src/test/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfigTests.java +++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfigTests.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; public class DatafeedConfigTests extends AbstractSerializingTestCase { @@ -253,6 +254,30 @@ public class DatafeedConfigTests extends AbstractSerializingTestCase scriptFields = config.getScriptFields(); + + assertThat(scriptFields.size(), equalTo(2)); + assertThat(scriptFields.get(0).fieldName(), equalTo("script1")); + assertThat(scriptFields.get(0).script().getIdOrCode(), equalTo("return 1+1;")); + assertFalse(scriptFields.get(0).script().getParams().containsKey("exact")); + + assertThat(scriptFields.get(1).fieldName(), equalTo("script2")); + assertThat(scriptFields.get(1).script().getIdOrCode(), containsString("List domainSplit(String host, Map params)")); + assertTrue(scriptFields.get(1).script().getParams().containsKey("exact")); + } + public static String randomValidDatafeedId() { CodepointSetGenerator generator = new CodepointSetGenerator("abcdefghijklmnopqrstuvwxyz".toCharArray()); return generator.ofCodePointsLength(random(), 10, 10); diff --git a/qa/single-node-tests/src/test/java/org/elasticsearch/xpack/ml/transforms/PainlessDomainSplitIT.java b/qa/single-node-tests/src/test/java/org/elasticsearch/xpack/ml/transforms/PainlessDomainSplitIT.java index de828736c9c..86097b9d722 100644 --- a/qa/single-node-tests/src/test/java/org/elasticsearch/xpack/ml/transforms/PainlessDomainSplitIT.java +++ b/qa/single-node-tests/src/test/java/org/elasticsearch/xpack/ml/transforms/PainlessDomainSplitIT.java @@ -13,17 +13,17 @@ import org.elasticsearch.client.Response; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.test.StreamsUtils; import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xpack.ml.MlPlugin; +import org.elasticsearch.xpack.ml.utils.DomainSplitFunction; +import org.joda.time.DateTime; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.Enumeration; import java.util.HashMap; -import java.util.Locale; import java.util.Map; -import java.util.ResourceBundle; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -32,88 +32,6 @@ import static org.hamcrest.Matchers.equalTo; public class PainlessDomainSplitIT extends ESRestTestCase { - private static final Map EXCLUDED; - - static { - EXCLUDED = new HashMap<>(); - EXCLUDED.put("city.yokohama.jp", "i"); - EXCLUDED.put("teledata.mz", "i"); - EXCLUDED.put("city.kobe.jp", "i"); - EXCLUDED.put("city.sapporo.jp", "i"); - EXCLUDED.put("city.kawasaki.jp", "i"); - EXCLUDED.put("city.nagoya.jp", "i"); - EXCLUDED.put("www.ck", "i"); - EXCLUDED.put("city.sendai.jp", "i"); - EXCLUDED.put("city.kitakyushu.jp", "i"); - } - - private static final Map UNDER; - - static { - UNDER = new HashMap<>(); - UNDER.put("bd", "i"); - UNDER.put("np", "i"); - UNDER.put("jm", "i"); - UNDER.put("fj", "i"); - UNDER.put("fk", "i"); - UNDER.put("ye", "i"); - UNDER.put("sch.uk", "i"); - UNDER.put("bn", "i"); - UNDER.put("kitakyushu.jp", "i"); - UNDER.put("kobe.jp", "i"); - UNDER.put("ke", "i"); - UNDER.put("sapporo.jp", "i"); - UNDER.put("kh", "i"); - UNDER.put("mm", "i"); - UNDER.put("il", "i"); - UNDER.put("yokohama.jp", "i"); - UNDER.put("ck", "i"); - UNDER.put("nagoya.jp", "i"); - UNDER.put("sendai.jp", "i"); - UNDER.put("kw", "i"); - UNDER.put("er", "i"); - UNDER.put("mz", "i"); - UNDER.put("platform.sh", "p"); - UNDER.put("gu", "i"); - UNDER.put("nom.br", "i"); - UNDER.put("zm", "i"); - UNDER.put("pg", "i"); - UNDER.put("ni", "i"); - UNDER.put("kawasaki.jp", "i"); - UNDER.put("zw", "i"); - } - - public static final Map params; - - static { - params = new HashMap<>(); - - ResourceBundle resource = ResourceBundle.getBundle("org/elasticsearch/xpack/ml/transforms/exact", Locale.getDefault()); - Enumeration keys = resource.getKeys(); - Map exact = new HashMap<>(2048); - while (keys.hasMoreElements()) { - String key = (String) keys.nextElement(); - String value = resource.getString(key); - exact.put(key, value); - } - params.put("excluded", EXCLUDED); - params.put("under", UNDER); - params.put("exact", exact); - } - - static String domainSplitFunction; - - static { - - try { - domainSplitFunction = StreamsUtils.copyToStringFromClasspath("/org/elasticsearch/xpack/ml/transforms/script.painless") - .replace("\n", ""); - } catch (Exception e) { - domainSplitFunction = ""; - fail(e.toString()); - } - } - static class TestConfiguration { public String subDomainExpected; public String domainExpected; @@ -272,7 +190,12 @@ public class PainlessDomainSplitIT extends ESRestTestCase { new StringEntity("{ \"settings\": " + Strings.toString(settings) + " }"))); } - public void testBasics() throws Exception { + private void createIndex(String name, Settings settings, String mapping) throws IOException { + assertOK(client().performRequest("PUT", name, Collections.emptyMap(), + new StringEntity("{ \"settings\": " + Strings.toString(settings) + ", \"mappings\" : {" + mapping + "} }"))); + } + + public void testIsolated() throws Exception { Settings.Builder settings = Settings.builder() .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) @@ -284,6 +207,8 @@ public class PainlessDomainSplitIT extends ESRestTestCase { Pattern pattern = Pattern.compile("domain_split\":\\[(.*?),(.*?)\\]"); + Map params = new HashMap<>(DomainSplitFunction.params.size() + 1); + params.putAll(DomainSplitFunction.params); for (TestConfiguration testConfig : tests) { params.put("host", testConfig.hostName); String mapAsJson = new ObjectMapper().writeValueAsString(params); @@ -296,7 +221,7 @@ public class PainlessDomainSplitIT extends ESRestTestCase { " \"domain_split\" : {\n" + " \"script\" : {\n" + " \"lang\": \"painless\",\n" + - " \"inline\": \"" + domainSplitFunction + " return domainSplit(params['host'], params); \",\n" + + " \"inline\": \"" + DomainSplitFunction.function + " return domainSplit(params['host'], params); \",\n" + " \"params\": " + mapAsJson + "\n" + " }\n" + " }\n" + @@ -326,6 +251,117 @@ public class PainlessDomainSplitIT extends ESRestTestCase { assertThat("Expected domain [" + testConfig.domainExpected + "] but found [" + actualDomain + "]. Actual " + actualTotal + " vs Expected " + expectedTotal, actualDomain, equalTo(testConfig.domainExpected)); } + } + public void testHRDSplit() throws Exception { + + // Create job + String job = "{\n" + + " \"description\":\"Domain splitting\",\n" + + " \"analysis_config\" : {\n" + + " \"bucket_span\":3600,\n" + + " \"detectors\" :[{\"function\":\"count\", \"by_field_name\" : \"domain_split\"}]\n" + + " },\n" + + " \"data_description\" : {\n" + + " \"field_delimiter\":\",\",\n" + + " \"time_field\":\"time\"\n" + + " \n" + + " }\n" + + " }"; + + client().performRequest("PUT", MlPlugin.BASE_PATH + "anomaly_detectors/painless", Collections.emptyMap(), new StringEntity(job)); + client().performRequest("POST", MlPlugin.BASE_PATH + "anomaly_detectors/painless/_open"); + + // Create index to hold data + Settings.Builder settings = Settings.builder() + .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0); + + createIndex("painless", settings.build(), "\"test\": { \"properties\": { \"domain\": { \"type\": \"keyword\" },\"time\": { " + + "\"type\": \"date\" } } }"); + + // Index some data + DateTime baseTime = new DateTime().minusYears(1); + TestConfiguration test = tests.get(randomInt(tests.size()-1)); + Pattern pattern = Pattern.compile("by_field_value\":\"(" + test.subDomainExpected + "),(" + test.domainExpected +")\","); + for (int i = 0; i < 100; i++) { + + DateTime time = baseTime.plusHours(i); + if (i == 64) { + // Anomaly has 100 docs, but we don't care about the value + for (int j = 0; j < 100; j++) { + client().performRequest("PUT", "painless/test/" + time.toDateTimeISO() + "_" + j, + Collections.emptyMap(), + new StringEntity("{\"domain\": \"" + "bar.bar.com\", \"time\": \"" + time.toDateTimeISO() + "\"}")); + } + } else { + // Non-anomalous values will be what's seen when the anomaly is reported + client().performRequest("PUT", "painless/test/" + time.toDateTimeISO(), + Collections.emptyMap(), + new StringEntity("{\"domain\": \"" + test.hostName + "\", \"time\": \"" + time.toDateTimeISO() + "\"}")); + } + } + + client().performRequest("POST", "painless/_refresh"); + + // Create and start datafeed + String body = "{\n" + + " \"job_id\":\"painless\",\n" + + " \"indexes\":[\"painless\"],\n" + + " \"types\":[\"test\"],\n" + + " \"script_fields\": {\n" + + " \"domain_split\": {\n" + + " \"script\": \"return domainSplit(doc['domain'].value, params);\"\n" + + " }\n" + + " }\n" + + " }"; + + client().performRequest("PUT", MlPlugin.BASE_PATH + "datafeeds/painless", Collections.emptyMap(), new StringEntity(body)); + client().performRequest("POST", MlPlugin.BASE_PATH + "datafeeds/painless/_start"); + + boolean passed = awaitBusy(() -> { + try { + client().performRequest("POST", "/_refresh"); + + Response response = client().performRequest("GET", MlPlugin.BASE_PATH + "anomaly_detectors/painless/results/records"); + String responseBody = EntityUtils.toString(response.getEntity()); + + if (responseBody.contains("\"count\":2")) { + Matcher m = pattern.matcher(responseBody); + + String actualSubDomain = ""; + String actualDomain = ""; + if (m.find()) { + actualSubDomain = m.group(1).replace("\"", ""); + actualDomain = m.group(2).replace("\"", ""); + } + + String expectedTotal = "[" + test.subDomainExpected + "," + test.domainExpected + "]"; + String actualTotal = "[" + actualSubDomain + "," + actualDomain + "]"; + + // domainSplit() tests had subdomain, testHighestRegisteredDomainCases() do not + if (test.subDomainExpected != null) { + assertThat("Expected subdomain [" + test.subDomainExpected + "] but found [" + actualSubDomain + "]. Actual " + + actualTotal + " vs Expected " + expectedTotal, actualSubDomain, equalTo(test.subDomainExpected)); + } + + assertThat("Expected domain [" + test.domainExpected + "] but found [" + actualDomain + "]. Actual " + + actualTotal + " vs Expected " + expectedTotal, actualDomain, equalTo(test.domainExpected)); + + return true; + } else { + return false; + } + + } catch (Exception e) { + logger.error(e.getMessage()); + return false; + } + + }, 5, TimeUnit.SECONDS); + + if (!passed) { + fail("Anomaly records were not found within 5 seconds"); + } } }