From e21692e3870a9e4437f9884628bfb90ca19fe3dd Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 20 Jul 2018 21:10:35 +0200 Subject: [PATCH] INGEST: Make a few Processors callable by Painless (#32170) * INGEST: Make a few Processors callable by Painless * Extracted a few stateless String processors as well as the json processor to static methods and whitelisted them in Painless * provide whitelist from processors plugin --- modules/ingest-common/build.gradle | 6 + .../ingest/common/BytesProcessor.java | 6 +- .../ingest/common/JsonProcessor.java | 38 +-- .../ingest/common/LowercaseProcessor.java | 6 +- .../ingest/common/Processors.java | 49 ++++ .../common/ProcessorsWhitelistExtension.java | 41 ++++ .../ingest/common/URLDecodeProcessor.java | 10 +- .../ingest/common/UppercaseProcessor.java | 6 +- ...asticsearch.painless.spi.PainlessExtension | 1 + .../ingest/common/processors_whitelist.txt | 29 +++ .../ingest/common/BytesProcessorTests.java | 4 +- .../test/ingest/190_script_processor.yml | 216 ++++++++++++++++++ .../painless/spi/org.elasticsearch.txt | 2 +- 13 files changed, 391 insertions(+), 23 deletions(-) create mode 100644 modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Processors.java create mode 100644 modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/ProcessorsWhitelistExtension.java create mode 100644 modules/ingest-common/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension create mode 100644 modules/ingest-common/src/main/resources/org/elasticsearch/ingest/common/processors_whitelist.txt create mode 100644 modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/190_script_processor.yml diff --git a/modules/ingest-common/build.gradle b/modules/ingest-common/build.gradle index 424c1197da3..4f35bbee28d 100644 --- a/modules/ingest-common/build.gradle +++ b/modules/ingest-common/build.gradle @@ -20,11 +20,17 @@ esplugin { description 'Module for ingest processors that do not require additional security permissions or have large dependencies and resources' classname 'org.elasticsearch.ingest.common.IngestCommonPlugin' + extendedPlugins = ['lang-painless'] } dependencies { + compileOnly project(':modules:lang-painless') compile project(':libs:grok') } compileJava.options.compilerArgs << "-Xlint:-unchecked,-rawtypes" compileTestJava.options.compilerArgs << "-Xlint:-unchecked,-rawtypes" + +integTestCluster { + module project(':modules:lang-painless') +} \ No newline at end of file diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/BytesProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/BytesProcessor.java index dfe9a054acf..d07b56e1b3d 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/BytesProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/BytesProcessor.java @@ -35,9 +35,13 @@ public final class BytesProcessor extends AbstractStringProcessor { super(processorTag, field, ignoreMissing, targetField); } + public static long apply(String value) { + return ByteSizeValue.parseBytesSizeValue(value, null, "Ingest Field").getBytes(); + } + @Override protected Long process(String value) { - return ByteSizeValue.parseBytesSizeValue(value, null, getField()).getBytes(); + return apply(value); } @Override diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/JsonProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/JsonProcessor.java index 2f217735df2..c0a9d37abda 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/JsonProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/JsonProcessor.java @@ -67,13 +67,11 @@ public final class JsonProcessor extends AbstractProcessor { return addToRoot; } - @Override - public void execute(IngestDocument document) throws Exception { - Object fieldValue = document.getFieldValue(field, Object.class); - BytesReference bytesRef = (fieldValue == null) ? new BytesArray("null") : new BytesArray(fieldValue.toString()); + public static Object apply(Object fieldValue) { + BytesReference bytesRef = fieldValue == null ? new BytesArray("null") : new BytesArray(fieldValue.toString()); try (InputStream stream = bytesRef.streamInput(); XContentParser parser = JsonXContent.jsonXContent - .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, stream)) { + .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, stream)) { XContentParser.Token token = parser.nextToken(); Object value = null; if (token == XContentParser.Token.VALUE_NULL) { @@ -91,20 +89,32 @@ public final class JsonProcessor extends AbstractProcessor { } else if (token == XContentParser.Token.VALUE_EMBEDDED_OBJECT) { throw new IllegalArgumentException("cannot read binary value"); } - if (addToRoot && (value instanceof Map)) { - for (Map.Entry entry : ((Map) value).entrySet()) { - document.setFieldValue(entry.getKey(), entry.getValue()); - } - } else if (addToRoot) { - throw new IllegalArgumentException("cannot add non-map fields to root of document"); - } else { - document.setFieldValue(targetField, value); - } + return value; } catch (IOException e) { throw new IllegalArgumentException(e); } } + public static void apply(Map ctx, String fieldName) { + Object value = apply(ctx.get(fieldName)); + if (value instanceof Map) { + @SuppressWarnings("unchecked") + Map map = (Map) value; + ctx.putAll(map); + } else { + throw new IllegalArgumentException("cannot add non-map fields to root of document"); + } + } + + @Override + public void execute(IngestDocument document) throws Exception { + if (addToRoot) { + apply(document.getSourceAndMetadata(), field); + } else { + document.setFieldValue(targetField, apply(document.getFieldValue(field, Object.class))); + } + } + @Override public String getType() { return TYPE; diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/LowercaseProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/LowercaseProcessor.java index aef8b0cce24..4269cb05257 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/LowercaseProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/LowercaseProcessor.java @@ -35,9 +35,13 @@ public final class LowercaseProcessor extends AbstractStringProcessor { super(processorTag, field, ignoreMissing, targetField); } + public static String apply(String value) { + return value.toLowerCase(Locale.ROOT); + } + @Override protected String process(String value) { - return value.toLowerCase(Locale.ROOT); + return apply(value); } @Override diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Processors.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Processors.java new file mode 100644 index 00000000000..8a0b1529892 --- /dev/null +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/Processors.java @@ -0,0 +1,49 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.ingest.common; + +import java.util.Map; + +public final class Processors { + + public static long bytes(String value) { + return BytesProcessor.apply(value); + } + + public static String lowercase(String value) { + return LowercaseProcessor.apply(value); + } + + public static String uppercase(String value) { + return UppercaseProcessor.apply(value); + } + + public static Object json(Object fieldValue) { + return JsonProcessor.apply(fieldValue); + } + + public static void json(Map ctx, String field) { + JsonProcessor.apply(ctx, field); + } + + public static String urlDecode(String value) { + return URLDecodeProcessor.apply(value); + } +} diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/ProcessorsWhitelistExtension.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/ProcessorsWhitelistExtension.java new file mode 100644 index 00000000000..ced84057c7a --- /dev/null +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/ProcessorsWhitelistExtension.java @@ -0,0 +1,41 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.ingest.common; + +import org.elasticsearch.painless.spi.PainlessExtension; +import org.elasticsearch.painless.spi.Whitelist; +import org.elasticsearch.painless.spi.WhitelistLoader; +import org.elasticsearch.script.IngestScript; +import org.elasticsearch.script.ScriptContext; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class ProcessorsWhitelistExtension implements PainlessExtension { + + private static final Whitelist WHITELIST = + WhitelistLoader.loadFromResourceFiles(ProcessorsWhitelistExtension.class, "processors_whitelist.txt"); + + @Override + public Map, List> getContextWhitelists() { + return Collections.singletonMap(IngestScript.CONTEXT, Collections.singletonList(WHITELIST)); + } +} diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/URLDecodeProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/URLDecodeProcessor.java index 945419499ad..fb6c5acf98b 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/URLDecodeProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/URLDecodeProcessor.java @@ -34,15 +34,19 @@ public final class URLDecodeProcessor extends AbstractStringProcessor { super(processorTag, field, ignoreMissing, targetField); } - @Override - protected String process(String value) { + public static String apply(String value) { try { return URLDecoder.decode(value, "UTF-8"); } catch (UnsupportedEncodingException e) { - throw new IllegalArgumentException("could not URL-decode field[" + getField() + "]", e); + throw new IllegalArgumentException("Could not URL-decode value.", e); } } + @Override + protected String process(String value) { + return apply(value); + } + @Override public String getType() { return TYPE; diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/UppercaseProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/UppercaseProcessor.java index af93f06a8f2..6c428627c7d 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/UppercaseProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/UppercaseProcessor.java @@ -34,9 +34,13 @@ public final class UppercaseProcessor extends AbstractStringProcessor { super(processorTag, field, ignoreMissing, targetField); } + public static String apply(String value) { + return value.toUpperCase(Locale.ROOT); + } + @Override protected String process(String value) { - return value.toUpperCase(Locale.ROOT); + return apply(value); } @Override diff --git a/modules/ingest-common/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension b/modules/ingest-common/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension new file mode 100644 index 00000000000..8a98f034be5 --- /dev/null +++ b/modules/ingest-common/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension @@ -0,0 +1 @@ +org.elasticsearch.ingest.common.ProcessorsWhitelistExtension \ No newline at end of file diff --git a/modules/ingest-common/src/main/resources/org/elasticsearch/ingest/common/processors_whitelist.txt b/modules/ingest-common/src/main/resources/org/elasticsearch/ingest/common/processors_whitelist.txt new file mode 100644 index 00000000000..3d93b19f066 --- /dev/null +++ b/modules/ingest-common/src/main/resources/org/elasticsearch/ingest/common/processors_whitelist.txt @@ -0,0 +1,29 @@ +# +# Licensed to Elasticsearch under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# This file contains a whitelist of static processor methods that can be accessed from painless + +class org.elasticsearch.ingest.common.Processors { + long bytes(String) + String lowercase(String) + String uppercase(String) + Object json(Object) + void json(Map, String) + String urlDecode(String) +} \ No newline at end of file diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/BytesProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/BytesProcessorTests.java index 0da3434adf1..788340a455a 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/BytesProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/BytesProcessorTests.java @@ -63,7 +63,7 @@ public class BytesProcessorTests extends AbstractStringProcessorTestCase { Processor processor = newProcessor(fieldName, randomBoolean(), fieldName); ElasticsearchException exception = expectThrows(ElasticsearchException.class, () -> processor.execute(ingestDocument)); assertThat(exception.getMessage(), - CoreMatchers.equalTo("failed to parse setting [" + fieldName + "] with value [8912pb] as a size in bytes")); + CoreMatchers.equalTo("failed to parse setting [Ingest Field] with value [8912pb] as a size in bytes")); assertThat(exception.getCause().getMessage(), CoreMatchers.containsString("Values greater than 9223372036854775807 bytes are not supported")); } @@ -93,6 +93,6 @@ public class BytesProcessorTests extends AbstractStringProcessorTestCase { processor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue(fieldName, expectedResultType()), equalTo(1126L)); assertWarnings("Fractional bytes values are deprecated. Use non-fractional bytes values instead: [1.1kb] found for setting " + - "[" + fieldName + "]"); + "[Ingest Field]"); } } diff --git a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/190_script_processor.yml b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/190_script_processor.yml new file mode 100644 index 00000000000..bd55b764a95 --- /dev/null +++ b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/190_script_processor.yml @@ -0,0 +1,216 @@ +--- +teardown: + - do: + ingest.delete_pipeline: + id: "my_pipeline" + ignore: 404 + +--- +"Test invoke bytes processor": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "script" : { + "lang": "painless", + "source" : "ctx.target_field = Processors.bytes(ctx.source_field)" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + type: test + id: 1 + pipeline: "my_pipeline" + body: {source_field: "1kb"} + + - do: + get: + index: test + type: test + id: 1 + - match: { _source.source_field: "1kb" } + - match: { _source.target_field: 1024 } + +--- +"Test invoke lowercase processor": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "script" : { + "lang": "painless", + "source" : "ctx.target_field = Processors.lowercase(ctx.source_field)" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + type: test + id: 1 + pipeline: "my_pipeline" + body: {source_field: "FooBar"} + + - do: + get: + index: test + type: test + id: 1 + - match: { _source.source_field: "FooBar" } + - match: { _source.target_field: "foobar" } + +--- +"Test invoke uppercase processor": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "script" : { + "lang": "painless", + "source" : "ctx.target_field = Processors.uppercase(ctx.source_field)" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + type: test + id: 1 + pipeline: "my_pipeline" + body: {source_field: "FooBar"} + + - do: + get: + index: test + type: test + id: 1 + - match: { _source.source_field: "FooBar" } + - match: { _source.target_field: "FOOBAR" } + +--- +"Test invoke json processor, assign to field": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "script" : { + "lang": "painless", + "source" : "ctx.target_field = Processors.json(ctx.source_field)" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + type: test + id: 1 + pipeline: "my_pipeline" + body: {source_field: "{\"foo\":\"bar\"}"} + + - do: + get: + index: test + type: test + id: 1 + - match: { _source.source_field: "{\"foo\":\"bar\"}" } + - match: { _source.target_field.foo: "bar" } + +--- +"Test invoke json processor, assign to root": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "script" : { + "lang": "painless", + "source" : "Processors.json(ctx, 'source_field')" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + type: test + id: 1 + pipeline: "my_pipeline" + body: {source_field: "{\"foo\":\"bar\"}"} + + - do: + get: + index: test + type: test + id: 1 + - match: { _source.source_field: "{\"foo\":\"bar\"}" } + - match: { _source.foo: "bar" } + +--- +"Test invoke urlDecode processor": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "script" : { + "lang": "painless", + "source" : "ctx.target_field = Processors.urlDecode(ctx.source_field)" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + type: test + id: 1 + pipeline: "my_pipeline" + body: {source_field: "foo%20bar"} + + - do: + get: + index: test + type: test + id: 1 + - match: { _source.source_field: "foo%20bar" } + - match: { _source.target_field: "foo bar" } diff --git a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt index 6495659d9cd..8491d15c27e 100644 --- a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt +++ b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/spi/org.elasticsearch.txt @@ -174,4 +174,4 @@ class org.elasticsearch.index.similarity.ScriptedSimilarity$Term { class org.elasticsearch.index.similarity.ScriptedSimilarity$Doc { int getLength() float getFreq() -} +} \ No newline at end of file