Support array for all string ingest processors (#53694)

This commit is contained in:
Dan Hermann 2020-03-18 07:07:49 -05:00 committed by GitHub
parent 2384c1359d
commit 94ac979c66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 66 additions and 10 deletions

View File

@ -1,6 +1,6 @@
[[bytes-processor]]
=== Bytes Processor
Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024).
Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024). If the field is an array of strings, all members of the array will be converted.
Supported human readable units are "b", "kb", "mb", "gb", "tb", "pb" case insensitive. An error will occur if
the field is not a supported format or resultant value exceeds 2^63.

View File

@ -1,7 +1,7 @@
[[gsub-processor]]
=== Gsub Processor
Converts a string field by applying a regular expression and a replacement.
If the field is not a string, the processor will throw an exception.
If the field is an array of string, all members of the array will be converted. If any non-string values are encountered, the processor will throw an exception.
[[gsub-options]]
.Gsub Options

View File

@ -1,6 +1,6 @@
[[htmlstrip-processor]]
=== HTML Strip Processor
Removes HTML from field.
Removes HTML tags from the field. If the field is an array of strings, HTML tags will be removed from all members of the array.
NOTE: Each HTML tag is replaced with a `\n` character.

View File

@ -1,6 +1,6 @@
[[lowercase-processor]]
=== Lowercase Processor
Converts a string to its lowercase equivalent.
Converts a string to its lowercase equivalent. If the field is an array of strings, all members of the array will be converted.
[[lowercase-options]]
.Lowercase Options

View File

@ -1,6 +1,6 @@
[[trim-processor]]
=== Trim Processor
Trims whitespace from field.
Trims whitespace from field. If the field is an array of strings, all members of the array will be trimmed.
NOTE: This only works on leading and trailing whitespace.

View File

@ -1,6 +1,6 @@
[[uppercase-processor]]
=== Uppercase Processor
Converts a string to its uppercase equivalent.
Converts a string to its uppercase equivalent. If the field is an array of strings, all members of the array will be converted.
[[uppercase-options]]
.Uppercase Options

View File

@ -1,6 +1,6 @@
[[urldecode-processor]]
=== URL Decode Processor
URL-decodes a string
URL-decodes a string. If the field is an array of strings, all members of the array will be decoded.
[[urldecode-options]]
.URL Decode Options

View File

@ -24,6 +24,8 @@ import org.elasticsearch.ingest.ConfigurationUtils;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
@ -58,7 +60,8 @@ abstract class AbstractStringProcessor<T> extends AbstractProcessor {
@Override
public final IngestDocument execute(IngestDocument document) {
String val = document.getFieldValue(field, String.class, ignoreMissing);
Object val = document.getFieldValue(field, Object.class, ignoreMissing);
Object newValue;
if (val == null && ignoreMissing) {
return document;
@ -66,7 +69,29 @@ abstract class AbstractStringProcessor<T> extends AbstractProcessor {
throw new IllegalArgumentException("field [" + field + "] is null, cannot process it.");
}
document.setFieldValue(targetField, process(val));
if (val instanceof List) {
List<?> list = (List<?>) val;
List<Object> newList = new ArrayList<>(list.size());
for (Object value : list) {
if (value instanceof String) {
newList.add(process((String) value));
} else {
throw new IllegalArgumentException("value [" + value + "] of type [" + value.getClass().getName() +
"] in list field [" + field + "] cannot be cast to [" + String.class.getName() + "]");
}
}
newValue = newList;
} else {
if (val instanceof String) {
newValue = process((String) val);
} else {
throw new IllegalArgumentException("field [" + field + "] of type [" + val.getClass().getName() + "] cannot be cast to [" +
String.class.getName() + "]");
}
}
document.setFieldValue(targetField, newValue);
return document;
}

View File

@ -24,8 +24,10 @@ import org.elasticsearch.ingest.Processor;
import org.elasticsearch.ingest.RandomDocumentPicks;
import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
import static org.hamcrest.Matchers.containsString;
@ -41,7 +43,7 @@ public abstract class AbstractStringProcessorTestCase<T> extends ESTestCase {
protected abstract T expectedResult(String input);
protected Class<?> expectedResultType(){
protected Class<?> expectedResultType() {
return String.class; // most results types are Strings
}
@ -52,6 +54,19 @@ public abstract class AbstractStringProcessorTestCase<T> extends ESTestCase {
Processor processor = newProcessor(fieldName, randomBoolean(), fieldName);
processor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue(fieldName, expectedResultType()), equalTo(expectedResult(fieldValue)));
int numItems = randomIntBetween(1, 10);
List<String> fieldValueList = new ArrayList<>();
List<T> expectedList = new ArrayList<>();
for (int i = 0; i < numItems; i++) {
String randomString = RandomDocumentPicks.randomString(random());
fieldValueList.add(modifyInput(randomString));
expectedList.add(expectedResult(randomString));
}
String multiValueFieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValueList);
Processor multiValueProcessor = newProcessor(multiValueFieldName, randomBoolean(), multiValueFieldName);
multiValueProcessor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue(multiValueFieldName, List.class), equalTo(expectedList));
}
public void testFieldNotFound() throws Exception {
@ -94,6 +109,14 @@ public abstract class AbstractStringProcessorTestCase<T> extends ESTestCase {
Exception e = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(e.getMessage(), equalTo("field [" + fieldName +
"] of type [java.lang.Integer] cannot be cast to [java.lang.String]"));
List<Integer> fieldValueList = new ArrayList<>();
int randomValue = randomInt();
fieldValueList.add(randomValue);
ingestDocument.setFieldValue(fieldName, fieldValueList);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(exception.getMessage(), equalTo("value [" + randomValue + "] of type [java.lang.Integer] in list field [" + fieldName +
"] cannot be cast to [java.lang.String]"));
}
public void testNonStringValueWithIgnoreMissing() throws Exception {
@ -104,6 +127,14 @@ public abstract class AbstractStringProcessorTestCase<T> extends ESTestCase {
Exception e = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(e.getMessage(), equalTo("field [" + fieldName +
"] of type [java.lang.Integer] cannot be cast to [java.lang.String]"));
List<Integer> fieldValueList = new ArrayList<>();
int randomValue = randomInt();
fieldValueList.add(randomValue);
ingestDocument.setFieldValue(fieldName, fieldValueList);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(exception.getMessage(), equalTo("value [" + randomValue + "] of type [java.lang.Integer] in list field [" + fieldName +
"] cannot be cast to [java.lang.String]"));
}
public void testTargetField() throws Exception {