mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-25 09:28:27 +00:00
Add option to split processor for preserving trailing empty fields (#48685)
This commit is contained in:
parent
77acbc4fa9
commit
dbc05cd808
@ -11,6 +11,7 @@ Splits a field into an array using a separator character. Only works on string f
|
||||
| `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
|
||||
| `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place
|
||||
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|
||||
| `preserve_trailing`| no | `false` | Preserves empty trailing fields, if any.
|
||||
include::common-options.asciidoc[]
|
||||
|======
|
||||
|
||||
@ -25,3 +26,20 @@ include::common-options.asciidoc[]
|
||||
--------------------------------------------------
|
||||
// NOTCONSOLE
|
||||
<1> Treat all consecutive whitespace characters as a single separator
|
||||
|
||||
If the `preserve_trailing` option is enabled, any trailing empty fields in the input will be preserved. For example,
|
||||
in the configuration below, a value of `A,,B,,` in the `my_field` property will be split into an array of five elements
|
||||
`["A", "", "B", "", ""]` with two empty trailing fields. If the `preserve_trailing` property were not enabled, the two
|
||||
empty trailing fields would be discarded resulting in the three-element array `["A", "", "B"]`.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"split": {
|
||||
"field": "my_field",
|
||||
"separator": ",",
|
||||
"preserve_trailing": true
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// NOTCONSOLE
|
||||
|
@ -41,13 +41,15 @@ public final class SplitProcessor extends AbstractProcessor {
|
||||
private final String field;
|
||||
private final String separator;
|
||||
private final boolean ignoreMissing;
|
||||
private final boolean preserveTrailing;
|
||||
private final String targetField;
|
||||
|
||||
SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, String targetField) {
|
||||
SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, boolean preserveTrailing, String targetField) {
|
||||
super(tag);
|
||||
this.field = field;
|
||||
this.separator = separator;
|
||||
this.ignoreMissing = ignoreMissing;
|
||||
this.preserveTrailing = preserveTrailing;
|
||||
this.targetField = targetField;
|
||||
}
|
||||
|
||||
@ -63,6 +65,8 @@ public final class SplitProcessor extends AbstractProcessor {
|
||||
return ignoreMissing;
|
||||
}
|
||||
|
||||
boolean isPreserveTrailing() { return preserveTrailing; }
|
||||
|
||||
String getTargetField() {
|
||||
return targetField;
|
||||
}
|
||||
@ -77,7 +81,7 @@ public final class SplitProcessor extends AbstractProcessor {
|
||||
throw new IllegalArgumentException("field [" + field + "] is null, cannot split.");
|
||||
}
|
||||
|
||||
String[] strings = oldVal.split(separator);
|
||||
String[] strings = oldVal.split(separator, preserveTrailing ? -1 : 0);
|
||||
List<String> splitList = new ArrayList<>(strings.length);
|
||||
Collections.addAll(splitList, strings);
|
||||
document.setFieldValue(targetField, splitList);
|
||||
@ -95,9 +99,10 @@ public final class SplitProcessor extends AbstractProcessor {
|
||||
Map<String, Object> config) throws Exception {
|
||||
String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
|
||||
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
|
||||
boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "preserve_trailing", false);
|
||||
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field);
|
||||
return new SplitProcessor(processorTag, field,
|
||||
ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator"), ignoreMissing, targetField);
|
||||
String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator");
|
||||
return new SplitProcessor(processorTag, field, separator, ignoreMissing, preserveTrailing, targetField);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -79,6 +79,24 @@ public class SplitProcessorFactoryTests extends ESTestCase {
|
||||
assertThat(splitProcessor.getField(), equalTo("field1"));
|
||||
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
|
||||
assertFalse(splitProcessor.isIgnoreMissing());
|
||||
assertFalse(splitProcessor.isPreserveTrailing());
|
||||
assertThat(splitProcessor.getTargetField(), equalTo("target"));
|
||||
}
|
||||
|
||||
public void testCreateWithPreserveTrailing() throws Exception {
|
||||
SplitProcessor.Factory factory = new SplitProcessor.Factory();
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "field1");
|
||||
config.put("separator", "\\.");
|
||||
config.put("target_field", "target");
|
||||
config.put("preserve_trailing", true);
|
||||
String processorTag = randomAlphaOfLength(10);
|
||||
SplitProcessor splitProcessor = factory.create(null, processorTag, config);
|
||||
assertThat(splitProcessor.getTag(), equalTo(processorTag));
|
||||
assertThat(splitProcessor.getField(), equalTo("field1"));
|
||||
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
|
||||
assertFalse(splitProcessor.isIgnoreMissing());
|
||||
assertThat(splitProcessor.getTargetField(), equalTo("target"));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ public class SplitProcessorTests extends ESTestCase {
|
||||
public void testSplit() throws Exception {
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
|
||||
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
|
||||
processor.execute(ingestDocument);
|
||||
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
|
||||
}
|
||||
@ -47,7 +47,7 @@ public class SplitProcessorTests extends ESTestCase {
|
||||
public void testSplitFieldNotFound() throws Exception {
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
||||
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
|
||||
try {
|
||||
processor.execute(ingestDocument);
|
||||
fail("split processor should have failed");
|
||||
@ -59,7 +59,7 @@ public class SplitProcessorTests extends ESTestCase {
|
||||
public void testSplitNullValue() throws Exception {
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(),
|
||||
Collections.singletonMap("field", null));
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, "field");
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, false, "field");
|
||||
try {
|
||||
processor.execute(ingestDocument);
|
||||
fail("split processor should have failed");
|
||||
@ -73,7 +73,7 @@ public class SplitProcessorTests extends ESTestCase {
|
||||
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
|
||||
Collections.singletonMap(fieldName, null));
|
||||
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, fieldName);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, false, fieldName);
|
||||
processor.execute(ingestDocument);
|
||||
assertIngestDocument(originalIngestDocument, ingestDocument);
|
||||
}
|
||||
@ -81,7 +81,7 @@ public class SplitProcessorTests extends ESTestCase {
|
||||
public void testSplitNonExistentWithIgnoreMissing() throws Exception {
|
||||
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
|
||||
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, "field");
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, false, "field");
|
||||
processor.execute(ingestDocument);
|
||||
assertIngestDocument(originalIngestDocument, ingestDocument);
|
||||
}
|
||||
@ -90,7 +90,7 @@ public class SplitProcessorTests extends ESTestCase {
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
||||
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
||||
ingestDocument.setFieldValue(fieldName, randomInt());
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
|
||||
try {
|
||||
processor.execute(ingestDocument);
|
||||
fail("split processor should have failed");
|
||||
@ -121,8 +121,24 @@ public class SplitProcessorTests extends ESTestCase {
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
|
||||
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
|
||||
String targetFieldName = fieldName + randomAlphaOfLength(5);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, targetFieldName);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, targetFieldName);
|
||||
processor.execute(ingestDocument);
|
||||
assertThat(ingestDocument.getFieldValue(targetFieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
|
||||
}
|
||||
|
||||
public void testSplitWithPreserveTrailing() throws Exception {
|
||||
doTestSplitWithPreserveTrailing(true, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz", "", ""));
|
||||
}
|
||||
|
||||
public void testSplitWithoutPreserveTrailing() throws Exception {
|
||||
doTestSplitWithPreserveTrailing(false, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz"));
|
||||
}
|
||||
|
||||
private void doTestSplitWithPreserveTrailing(boolean preserveTrailing, String fieldValue, List<String> expected) throws Exception {
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
|
||||
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValue);
|
||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\|", false, preserveTrailing, fieldName);
|
||||
processor.execute(ingestDocument);
|
||||
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(expected));
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user