mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-30 20:08:29 +00:00
Add option to split processor for preserving trailing empty fields (#48685)
This commit is contained in:
parent
77acbc4fa9
commit
dbc05cd808
@ -11,6 +11,7 @@ Splits a field into an array using a separator character. Only works on string f
|
|||||||
| `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
|
| `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
|
||||||
| `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place
|
| `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place
|
||||||
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|
||||||
|
| `preserve_trailing`| no | `false` | Preserves empty trailing fields, if any.
|
||||||
include::common-options.asciidoc[]
|
include::common-options.asciidoc[]
|
||||||
|======
|
|======
|
||||||
|
|
||||||
@ -25,3 +26,20 @@ include::common-options.asciidoc[]
|
|||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// NOTCONSOLE
|
// NOTCONSOLE
|
||||||
<1> Treat all consecutive whitespace characters as a single separator
|
<1> Treat all consecutive whitespace characters as a single separator
|
||||||
|
|
||||||
|
If the `preserve_trailing` option is enabled, any trailing empty fields in the input will be preserved. For example,
|
||||||
|
in the configuration below, a value of `A,,B,,` in the `my_field` property will be split into an array of five elements
|
||||||
|
`["A", "", "B", "", ""]` with two empty trailing fields. If the `preserve_trailing` property were not enabled, the two
|
||||||
|
empty trailing fields would be discarded resulting in the three-element array `["A", "", "B"]`.
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
"split": {
|
||||||
|
"field": "my_field",
|
||||||
|
"separator": ",",
|
||||||
|
"preserve_trailing": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// NOTCONSOLE
|
||||||
|
@ -41,13 +41,15 @@ public final class SplitProcessor extends AbstractProcessor {
|
|||||||
private final String field;
|
private final String field;
|
||||||
private final String separator;
|
private final String separator;
|
||||||
private final boolean ignoreMissing;
|
private final boolean ignoreMissing;
|
||||||
|
private final boolean preserveTrailing;
|
||||||
private final String targetField;
|
private final String targetField;
|
||||||
|
|
||||||
SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, String targetField) {
|
SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, boolean preserveTrailing, String targetField) {
|
||||||
super(tag);
|
super(tag);
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.separator = separator;
|
this.separator = separator;
|
||||||
this.ignoreMissing = ignoreMissing;
|
this.ignoreMissing = ignoreMissing;
|
||||||
|
this.preserveTrailing = preserveTrailing;
|
||||||
this.targetField = targetField;
|
this.targetField = targetField;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -63,6 +65,8 @@ public final class SplitProcessor extends AbstractProcessor {
|
|||||||
return ignoreMissing;
|
return ignoreMissing;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean isPreserveTrailing() { return preserveTrailing; }
|
||||||
|
|
||||||
String getTargetField() {
|
String getTargetField() {
|
||||||
return targetField;
|
return targetField;
|
||||||
}
|
}
|
||||||
@ -77,7 +81,7 @@ public final class SplitProcessor extends AbstractProcessor {
|
|||||||
throw new IllegalArgumentException("field [" + field + "] is null, cannot split.");
|
throw new IllegalArgumentException("field [" + field + "] is null, cannot split.");
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] strings = oldVal.split(separator);
|
String[] strings = oldVal.split(separator, preserveTrailing ? -1 : 0);
|
||||||
List<String> splitList = new ArrayList<>(strings.length);
|
List<String> splitList = new ArrayList<>(strings.length);
|
||||||
Collections.addAll(splitList, strings);
|
Collections.addAll(splitList, strings);
|
||||||
document.setFieldValue(targetField, splitList);
|
document.setFieldValue(targetField, splitList);
|
||||||
@ -95,9 +99,10 @@ public final class SplitProcessor extends AbstractProcessor {
|
|||||||
Map<String, Object> config) throws Exception {
|
Map<String, Object> config) throws Exception {
|
||||||
String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
|
String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
|
||||||
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
|
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
|
||||||
|
boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "preserve_trailing", false);
|
||||||
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field);
|
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field);
|
||||||
return new SplitProcessor(processorTag, field,
|
String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator");
|
||||||
ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator"), ignoreMissing, targetField);
|
return new SplitProcessor(processorTag, field, separator, ignoreMissing, preserveTrailing, targetField);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,6 +79,24 @@ public class SplitProcessorFactoryTests extends ESTestCase {
|
|||||||
assertThat(splitProcessor.getField(), equalTo("field1"));
|
assertThat(splitProcessor.getField(), equalTo("field1"));
|
||||||
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
|
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
|
||||||
assertFalse(splitProcessor.isIgnoreMissing());
|
assertFalse(splitProcessor.isIgnoreMissing());
|
||||||
|
assertFalse(splitProcessor.isPreserveTrailing());
|
||||||
assertThat(splitProcessor.getTargetField(), equalTo("target"));
|
assertThat(splitProcessor.getTargetField(), equalTo("target"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCreateWithPreserveTrailing() throws Exception {
|
||||||
|
SplitProcessor.Factory factory = new SplitProcessor.Factory();
|
||||||
|
Map<String, Object> config = new HashMap<>();
|
||||||
|
config.put("field", "field1");
|
||||||
|
config.put("separator", "\\.");
|
||||||
|
config.put("target_field", "target");
|
||||||
|
config.put("preserve_trailing", true);
|
||||||
|
String processorTag = randomAlphaOfLength(10);
|
||||||
|
SplitProcessor splitProcessor = factory.create(null, processorTag, config);
|
||||||
|
assertThat(splitProcessor.getTag(), equalTo(processorTag));
|
||||||
|
assertThat(splitProcessor.getField(), equalTo("field1"));
|
||||||
|
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
|
||||||
|
assertFalse(splitProcessor.isIgnoreMissing());
|
||||||
|
assertThat(splitProcessor.getTargetField(), equalTo("target"));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -39,7 +39,7 @@ public class SplitProcessorTests extends ESTestCase {
|
|||||||
public void testSplit() throws Exception {
|
public void testSplit() throws Exception {
|
||||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
|
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
|
||||||
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
|
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
|
||||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
|
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
|
||||||
processor.execute(ingestDocument);
|
processor.execute(ingestDocument);
|
||||||
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
|
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
|
||||||
}
|
}
|
||||||
@ -47,7 +47,7 @@ public class SplitProcessorTests extends ESTestCase {
|
|||||||
public void testSplitFieldNotFound() throws Exception {
|
public void testSplitFieldNotFound() throws Exception {
|
||||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
||||||
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
||||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
|
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
|
||||||
try {
|
try {
|
||||||
processor.execute(ingestDocument);
|
processor.execute(ingestDocument);
|
||||||
fail("split processor should have failed");
|
fail("split processor should have failed");
|
||||||
@ -59,7 +59,7 @@ public class SplitProcessorTests extends ESTestCase {
|
|||||||
public void testSplitNullValue() throws Exception {
|
public void testSplitNullValue() throws Exception {
|
||||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(),
|
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(),
|
||||||
Collections.singletonMap("field", null));
|
Collections.singletonMap("field", null));
|
||||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, "field");
|
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, false, "field");
|
||||||
try {
|
try {
|
||||||
processor.execute(ingestDocument);
|
processor.execute(ingestDocument);
|
||||||
fail("split processor should have failed");
|
fail("split processor should have failed");
|
||||||
@ -73,7 +73,7 @@ public class SplitProcessorTests extends ESTestCase {
|
|||||||
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
|
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
|
||||||
Collections.singletonMap(fieldName, null));
|
Collections.singletonMap(fieldName, null));
|
||||||
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
|
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
|
||||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, fieldName);
|
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, false, fieldName);
|
||||||
processor.execute(ingestDocument);
|
processor.execute(ingestDocument);
|
||||||
assertIngestDocument(originalIngestDocument, ingestDocument);
|
assertIngestDocument(originalIngestDocument, ingestDocument);
|
||||||
}
|
}
|
||||||
@ -81,7 +81,7 @@ public class SplitProcessorTests extends ESTestCase {
|
|||||||
public void testSplitNonExistentWithIgnoreMissing() throws Exception {
|
public void testSplitNonExistentWithIgnoreMissing() throws Exception {
|
||||||
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
|
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
|
||||||
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
|
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
|
||||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, "field");
|
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, false, "field");
|
||||||
processor.execute(ingestDocument);
|
processor.execute(ingestDocument);
|
||||||
assertIngestDocument(originalIngestDocument, ingestDocument);
|
assertIngestDocument(originalIngestDocument, ingestDocument);
|
||||||
}
|
}
|
||||||
@ -90,7 +90,7 @@ public class SplitProcessorTests extends ESTestCase {
|
|||||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
|
||||||
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
String fieldName = RandomDocumentPicks.randomFieldName(random());
|
||||||
ingestDocument.setFieldValue(fieldName, randomInt());
|
ingestDocument.setFieldValue(fieldName, randomInt());
|
||||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
|
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
|
||||||
try {
|
try {
|
||||||
processor.execute(ingestDocument);
|
processor.execute(ingestDocument);
|
||||||
fail("split processor should have failed");
|
fail("split processor should have failed");
|
||||||
@ -121,8 +121,24 @@ public class SplitProcessorTests extends ESTestCase {
|
|||||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
|
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
|
||||||
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
|
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
|
||||||
String targetFieldName = fieldName + randomAlphaOfLength(5);
|
String targetFieldName = fieldName + randomAlphaOfLength(5);
|
||||||
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, targetFieldName);
|
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, targetFieldName);
|
||||||
processor.execute(ingestDocument);
|
processor.execute(ingestDocument);
|
||||||
assertThat(ingestDocument.getFieldValue(targetFieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
|
assertThat(ingestDocument.getFieldValue(targetFieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSplitWithPreserveTrailing() throws Exception {
|
||||||
|
doTestSplitWithPreserveTrailing(true, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz", "", ""));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSplitWithoutPreserveTrailing() throws Exception {
|
||||||
|
doTestSplitWithPreserveTrailing(false, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestSplitWithPreserveTrailing(boolean preserveTrailing, String fieldValue, List<String> expected) throws Exception {
|
||||||
|
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
|
||||||
|
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValue);
|
||||||
|
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\|", false, preserveTrailing, fieldName);
|
||||||
|
processor.execute(ingestDocument);
|
||||||
|
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(expected));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user