add `ignore_missing` flag to ingest plugins (#22273)

added `ignore_missing` flag to:

- Attachment Processor
- GeoIP Processor
- User-Agent Processor
This commit is contained in:
Tal Levy 2016-12-20 10:53:28 -08:00 committed by GitHub
parent ad4b1ecdeb
commit 5a90d9d7e6
13 changed files with 304 additions and 54 deletions

View File

@ -145,8 +145,24 @@ public final class IngestDocument {
* or if the field that is found at the provided path is not of the expected type.
*/
public byte[] getFieldValueAsBytes(String path) {
Object object = getFieldValue(path, Object.class);
if (object instanceof byte[]) {
return getFieldValueAsBytes(path, false);
}
/**
* Returns the value contained in the document for the provided path as a byte array.
* If the path value is a string, a base64 decode operation will happen.
* If the path value is a byte array, it is just returned
* @param path The path within the document in dot-notation
* @param ignoreMissing The flag to determine whether to throw an exception when `path` is not found in the document.
* @return the byte array for the provided path if existing
* @throws IllegalArgumentException if the path is null, empty, invalid, if the field doesn't exist
* or if the field that is found at the provided path is not of the expected type.
*/
public byte[] getFieldValueAsBytes(String path, boolean ignoreMissing) {
Object object = getFieldValue(path, Object.class, ignoreMissing);
if (object == null) {
return null;
} else if (object instanceof byte[]) {
return (byte[]) object;
} else if (object instanceof String) {
return Base64.getDecoder().decode(object.toString());

View File

@ -53,6 +53,7 @@ The node must be stopped before removing the plugin.
| `target_field` | no | attachment | The field that will hold the attachment information
| `indexed_chars` | no | 100000 | The number of chars being used for extraction to prevent huge fields. Use `-1` for no limit.
| `properties` | no | all | Properties to select to be stored. Can be `content`, `title`, `name`, `author`, `keywords`, `date`, `content_type`, `content_length`, `language`
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|======
For example, this:

View File

@ -54,6 +54,7 @@ The node must be stopped before removing the plugin.
| `target_field` | no | geoip | The field that will hold the geographical information looked up from the Maxmind database.
| `database_file` | no | GeoLite2-City.mmdb | The database filename in the geoip config directory. The ingest-geoip plugin ships with the GeoLite2-City.mmdb.gz and GeoLite2-Country.mmdb.gz files.
| `properties` | no | [`continent_name`, `country_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the geoip lookup.
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|======
*Depends on what is available in `database_field`:

View File

@ -43,11 +43,12 @@ The node must be stopped before removing the plugin.
.User-agent options
[options="header"]
|======
| Name | Required | Default | Description
| `field` | yes | - | The field containing the user agent string.
| `target_field` | no | user_agent | The field that will be filled with the user agent details.
| `regex_file` | no | - | The name of the file in the `config/ingest-user-agent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-user-agent will use the regexes.yaml from uap-core it ships with (see below).
| Name | Required | Default | Description
| `field` | yes | - | The field containing the user agent string.
| `target_field` | no | user_agent | The field that will be filled with the user agent details.
| `regex_file` | no | - | The name of the file in the `config/ingest-user-agent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-user-agent will use the regexes.yaml from uap-core it ships with (see below).
| `properties` | no | [`name`, `major`, `minor`, `patch`, `build`, `os`, `os_name`, `os_major`, `os_minor`, `device`] | Controls what properties are added to `target_field`.
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|======
Here is an example that adds the user agent details to the `user_agent` field based on the `agent` field:

View File

@ -38,6 +38,7 @@ import java.util.Map;
import java.util.Set;
import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readIntProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
@ -52,23 +53,36 @@ public final class AttachmentProcessor extends AbstractProcessor {
private final String targetField;
private final Set<Property> properties;
private final int indexedChars;
private final boolean ignoreMissing;
AttachmentProcessor(String tag, String field, String targetField, Set<Property> properties,
int indexedChars) throws IOException {
int indexedChars, boolean ignoreMissing) throws IOException {
super(tag);
this.field = field;
this.targetField = targetField;
this.properties = properties;
this.indexedChars = indexedChars;
this.ignoreMissing = ignoreMissing;
}
boolean isIgnoreMissing() {
return ignoreMissing;
}
@Override
public void execute(IngestDocument ingestDocument) {
Map<String, Object> additionalFields = new HashMap<>();
byte[] input = ingestDocument.getFieldValueAsBytes(field, ignoreMissing);
if (input == null && ignoreMissing) {
return;
} else if (input == null) {
throw new IllegalArgumentException("field [" + field + "] is null, cannot parse.");
}
try {
Metadata metadata = new Metadata();
byte[] input = ingestDocument.getFieldValueAsBytes(field);
String parsedContent = TikaImpl.parse(input, metadata, indexedChars);
if (properties.contains(Property.CONTENT) && Strings.hasLength(parsedContent)) {
@ -166,6 +180,7 @@ public final class AttachmentProcessor extends AbstractProcessor {
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "attachment");
List<String> properyNames = readOptionalList(TYPE, processorTag, config, "properties");
int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED);
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
final Set<Property> properties;
if (properyNames != null) {
@ -182,7 +197,7 @@ public final class AttachmentProcessor extends AbstractProcessor {
properties = DEFAULT_PROPERTIES;
}
return new AttachmentProcessor(processorTag, field, targetField, properties, indexedChars);
return new AttachmentProcessor(processorTag, field, targetField, properties, indexedChars, ignoreMissing);
}
}

View File

@ -52,6 +52,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("attachment"));
assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
assertFalse(processor.isIgnoreMissing());
}
public void testConfigureIndexedChars() throws Exception {
@ -64,6 +65,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
AttachmentProcessor processor = factory.create(null, processorTag, config);
assertThat(processor.getTag(), equalTo(processorTag));
assertThat(processor.getIndexedChars(), is(indexedChars));
assertFalse(processor.isIgnoreMissing());
}
public void testBuildTargetField() throws Exception {
@ -73,6 +75,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
AttachmentProcessor processor = factory.create(null, null, config);
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("_field"));
assertFalse(processor.isIgnoreMissing());
}
public void testBuildFields() throws Exception {
@ -90,6 +93,7 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
AttachmentProcessor processor = factory.create(null, null, config);
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getProperties(), equalTo(properties));
assertFalse(processor.isIgnoreMissing());
}
public void testBuildIllegalFieldOption() throws Exception {
@ -117,4 +121,19 @@ public class AttachmentProcessorFactoryTests extends ESTestCase {
assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
}
}
public void testIgnoreMissing() throws Exception {
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("ignore_missing", true);
String processorTag = randomAsciiOfLength(10);
AttachmentProcessor processor = factory.create(null, processorTag, config);
assertThat(processor.getTag(), equalTo(processorTag));
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("attachment"));
assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES));
assertTrue(processor.isIgnoreMissing());
}
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.ingest.attachment;
import org.apache.commons.io.IOUtils;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
import org.elasticsearch.ingest.RandomDocumentPicks;
import org.elasticsearch.test.ESTestCase;
import org.junit.Before;
@ -30,14 +31,17 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
@ -52,7 +56,7 @@ public class AttachmentProcessorTests extends ESTestCase {
@Before
public void createStandardProcessor() throws IOException {
processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field",
"target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000);
"target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000, false);
}
public void testEnglishTextDocument() throws Exception {
@ -85,7 +89,7 @@ public class AttachmentProcessorTests extends ESTestCase {
selectedProperties.add(AttachmentProcessor.Property.DATE);
}
processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field",
"target_field", selectedProperties, 10000);
"target_field", selectedProperties, 10000, false);
Map<String, Object> attachmentData = parseDocument("htmlWithEmptyDateMeta.html", processor);
assertThat(attachmentData.keySet(), hasSize(selectedFieldNames.length));
@ -199,6 +203,40 @@ public class AttachmentProcessorTests extends ESTestCase {
assertThat(attachmentData.get("content_length"), is(notNullValue()));
}
public void testNullValueWithIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("source_field", null));
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, true);
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}
public void testNonExistentWithIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, true);
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}
public void testNullWithoutIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("source_field", null));
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, false);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(exception.getMessage(), equalTo("field [source_field] is null, cannot parse."));
}
public void testNonExistentWithoutIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new AttachmentProcessor(randomAsciiOfLength(10), "source_field", "randomTarget", null, 10, false);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(exception.getMessage(), equalTo("field [source_field] not present as part of path [source_field]"));
}
private Map<String, Object> parseDocument(String file, AttachmentProcessor processor) throws Exception {
Map<String, Object> document = new HashMap<>();
document.put("source_field", getAsBase64(file));

View File

@ -50,6 +50,7 @@ import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
@ -63,18 +64,32 @@ public final class GeoIpProcessor extends AbstractProcessor {
private final String targetField;
private final DatabaseReader dbReader;
private final Set<Property> properties;
private final boolean ignoreMissing;
GeoIpProcessor(String tag, String field, DatabaseReader dbReader, String targetField, Set<Property> properties) throws IOException {
GeoIpProcessor(String tag, String field, DatabaseReader dbReader, String targetField, Set<Property> properties,
boolean ignoreMissing) throws IOException {
super(tag);
this.field = field;
this.targetField = targetField;
this.dbReader = dbReader;
this.properties = properties;
this.ignoreMissing = ignoreMissing;
}
boolean isIgnoreMissing() {
return ignoreMissing;
}
@Override
public void execute(IngestDocument ingestDocument) {
String ip = ingestDocument.getFieldValue(field, String.class);
String ip = ingestDocument.getFieldValue(field, String.class, ignoreMissing);
if (ip == null && ignoreMissing) {
return;
} else if (ip == null) {
throw new IllegalArgumentException("field [" + field + "] is null, cannot extract geoip information.");
}
final InetAddress ipAddress = InetAddresses.forString(ip);
Map<String, Object> geoData;
@ -268,6 +283,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "geoip");
String databaseFile = readStringProperty(TYPE, processorTag, config, "database_file", "GeoLite2-City.mmdb.gz");
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
DatabaseReader databaseReader = databaseReaders.get(databaseFile);
if (databaseReader == null) {
@ -298,7 +314,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
}
}
return new GeoIpProcessor(processorTag, ipField, databaseReader, targetField, properties);
return new GeoIpProcessor(processorTag, ipField, databaseReader, targetField, properties, ignoreMissing);
}
}

View File

@ -85,6 +85,24 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTargetField(), equalTo("geoip"));
assertThat(processor.getDbReader().getMetadata().getDatabaseType(), equalTo("GeoLite2-City"));
assertThat(processor.getProperties(), sameInstance(GeoIpProcessor.Factory.DEFAULT_CITY_PROPERTIES));
assertFalse(processor.isIgnoreMissing());
}
public void testSetIgnoreMissing() throws Exception {
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("ignore_missing", true);
String processorTag = randomAsciiOfLength(10);
GeoIpProcessor processor = factory.create(null, processorTag, config);
assertThat(processor.getTag(), equalTo(processorTag));
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("geoip"));
assertThat(processor.getDbReader().getMetadata().getDatabaseType(), equalTo("GeoLite2-City"));
assertThat(processor.getProperties(), sameInstance(GeoIpProcessor.Factory.DEFAULT_CITY_PROPERTIES));
assertTrue(processor.isIgnoreMissing());
}
public void testCountryBuildDefaults() throws Exception {
@ -102,6 +120,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTargetField(), equalTo("geoip"));
assertThat(processor.getDbReader().getMetadata().getDatabaseType(), equalTo("GeoLite2-Country"));
assertThat(processor.getProperties(), sameInstance(GeoIpProcessor.Factory.DEFAULT_COUNTRY_PROPERTIES));
assertFalse(processor.isIgnoreMissing());
}
public void testBuildTargetField() throws Exception {
@ -112,6 +131,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
GeoIpProcessor processor = factory.create(null, null, config);
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("_field"));
assertFalse(processor.isIgnoreMissing());
}
public void testBuildDbFile() throws Exception {
@ -124,6 +144,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
assertThat(processor.getTargetField(), equalTo("geoip"));
assertThat(processor.getDbReader().getMetadata().getDatabaseType(), equalTo("GeoLite2-Country"));
assertThat(processor.getProperties(), sameInstance(GeoIpProcessor.Factory.DEFAULT_COUNTRY_PROPERTIES));
assertFalse(processor.isIgnoreMissing());
}
public void testBuildWithCountryDbAndCityFields() throws Exception {
@ -174,6 +195,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
GeoIpProcessor processor = factory.create(null, null, config);
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getProperties(), equalTo(properties));
assertFalse(processor.isIgnoreMissing());
}
public void testBuildIllegalFieldOption() throws Exception {

View File

@ -20,17 +20,20 @@
package org.elasticsearch.ingest.geoip;
import com.maxmind.geoip2.DatabaseReader;
import org.elasticsearch.ingest.Processor;
import org.elasticsearch.ingest.RandomDocumentPicks;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
@ -40,7 +43,7 @@ public class GeoIpProcessorTests extends ESTestCase {
public void testCity() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
Map<String, Object> document = new HashMap<>();
document.put("source_field", "8.8.8.8");
@ -64,10 +67,52 @@ public class GeoIpProcessorTests extends ESTestCase {
assertThat(geoData.get("location"), equalTo(location));
}
public void testNullValueWithIgnoreMissing() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), true);
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("source_field", null));
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}
public void testNonExistentWithIgnoreMissing() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), true);
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}
public void testNullWithoutIgnoreMissing() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("source_field", null));
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(exception.getMessage(), equalTo("field [source_field] is null, cannot extract geoip information."));
}
public void testNonExistentWithoutIgnoreMissing() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(exception.getMessage(), equalTo("field [source_field] not present as part of path [source_field]"));
}
public void testCity_withIpV6() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
String address = "2602:306:33d3:8000::3257:9652";
Map<String, Object> document = new HashMap<>();
@ -95,7 +140,7 @@ public class GeoIpProcessorTests extends ESTestCase {
public void testCityWithMissingLocation() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
Map<String, Object> document = new HashMap<>();
document.put("source_field", "93.114.45.13");
@ -112,7 +157,7 @@ public class GeoIpProcessorTests extends ESTestCase {
public void testCountry() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-Country.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
Map<String, Object> document = new HashMap<>();
document.put("source_field", "82.170.213.79");
@ -132,7 +177,7 @@ public class GeoIpProcessorTests extends ESTestCase {
public void testCountryWithMissingLocation() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-Country.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
Map<String, Object> document = new HashMap<>();
document.put("source_field", "93.114.45.13");
@ -149,7 +194,7 @@ public class GeoIpProcessorTests extends ESTestCase {
public void testAddressIsNotInTheDatabase() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
Map<String, Object> document = new HashMap<>();
document.put("source_field", "127.0.0.1");
@ -162,7 +207,7 @@ public class GeoIpProcessorTests extends ESTestCase {
public void testInvalid() throws Exception {
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field",
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class), false);
Map<String, Object> document = new HashMap<>();
document.put("source_field", "www.google.com");

View File

@ -34,6 +34,7 @@ import java.util.Map;
import java.util.Set;
import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
@ -44,20 +45,32 @@ public class UserAgentProcessor extends AbstractProcessor {
private final String field;
private final String targetField;
private final Set<Property> properties;
private final UserAgentParser parser;
private final boolean ignoreMissing;
public UserAgentProcessor(String tag, String field, String targetField, UserAgentParser parser, Set<Property> properties) {
public UserAgentProcessor(String tag, String field, String targetField, UserAgentParser parser, Set<Property> properties,
boolean ignoreMissing) {
super(tag);
this.field = field;
this.targetField = targetField;
this.parser = parser;
this.properties = properties;
this.ignoreMissing = ignoreMissing;
}
boolean isIgnoreMissing() {
return ignoreMissing;
}
@Override
public void execute(IngestDocument ingestDocument) throws Exception {
String userAgent = ingestDocument.getFieldValue(field, String.class);
String userAgent = ingestDocument.getFieldValue(field, String.class, ignoreMissing);
if (userAgent == null && ignoreMissing) {
return;
} else if (userAgent == null) {
throw new IllegalArgumentException("field [" + field + "] is null, cannot parse user-agent.");
}
Details uaClient = parser.parse(userAgent);
@ -99,7 +112,7 @@ public class UserAgentProcessor extends AbstractProcessor {
else {
uaDetails.put("os", "Other");
}
break;
case OS_NAME:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.name != null) {
@ -168,7 +181,7 @@ public class UserAgentProcessor extends AbstractProcessor {
public String getType() {
return TYPE;
}
String getField() {
return field;
}
@ -180,7 +193,7 @@ public class UserAgentProcessor extends AbstractProcessor {
Set<Property> getProperties() {
return properties;
}
UserAgentParser getUaParser() {
return parser;
}
@ -188,7 +201,7 @@ public class UserAgentProcessor extends AbstractProcessor {
public static final class Factory implements Processor.Factory {
private final Map<String, UserAgentParser> userAgentParsers;
public Factory(Map<String, UserAgentParser> userAgentParsers) {
this.userAgentParsers = userAgentParsers;
}
@ -200,13 +213,14 @@ public class UserAgentProcessor extends AbstractProcessor {
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "user_agent");
String regexFilename = readStringProperty(TYPE, processorTag, config, "regex_file", IngestUserAgentPlugin.DEFAULT_PARSER_NAME);
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
UserAgentParser parser = userAgentParsers.get(regexFilename);
if (parser == null) {
throw newConfigurationException(TYPE, processorTag,
"regex_file", "regex file [" + regexFilename + "] doesn't exist (has to exist at node startup)");
}
final Set<Property> properties;
if (propertyNames != null) {
properties = EnumSet.noneOf(Property.class);
@ -221,7 +235,7 @@ public class UserAgentProcessor extends AbstractProcessor {
properties = EnumSet.allOf(Property.class);
}
return new UserAgentProcessor(processorTag, field, targetField, parser, properties);
return new UserAgentProcessor(processorTag, field, targetField, parser, properties, ignoreMissing);
}
}

View File

@ -89,6 +89,27 @@ public class UserAgentProcessorFactoryTests extends ESTestCase {
assertThat(processor.getUaParser().getOsPatterns().size(), greaterThan(0));
assertThat(processor.getUaParser().getDevicePatterns().size(), greaterThan(0));
assertThat(processor.getProperties(), equalTo(EnumSet.allOf(UserAgentProcessor.Property.class)));
assertFalse(processor.isIgnoreMissing());
}
public void testBuildWithIgnoreMissing() throws Exception {
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("ignore_missing", true);
String processorTag = randomAsciiOfLength(10);
UserAgentProcessor processor = factory.create(null, processorTag, config);
assertThat(processor.getTag(), equalTo(processorTag));
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("user_agent"));
assertThat(processor.getUaParser().getUaPatterns().size(), greaterThan(0));
assertThat(processor.getUaParser().getOsPatterns().size(), greaterThan(0));
assertThat(processor.getUaParser().getDevicePatterns().size(), greaterThan(0));
assertThat(processor.getProperties(), equalTo(EnumSet.allOf(UserAgentProcessor.Property.class)));
assertTrue(processor.isIgnoreMissing());
}
public void testBuildTargetField() throws Exception {

View File

@ -27,55 +27,96 @@ import org.junit.BeforeClass;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasKey;
import static org.hamcrest.Matchers.is;
public class UserAgentProcessorTests extends ESTestCase {
private static UserAgentProcessor processor;
@BeforeClass
public static void setupProcessor() throws IOException {
InputStream regexStream = UserAgentProcessor.class.getResourceAsStream("/regexes.yaml");
assertNotNull(regexStream);
UserAgentParser parser = new UserAgentParser(randomAsciiOfLength(10), regexStream, new UserAgentCache(1000));
processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", parser,
EnumSet.allOf(UserAgentProcessor.Property.class));
EnumSet.allOf(UserAgentProcessor.Property.class), false);
}
public void testNullValueWithIgnoreMissing() throws Exception {
UserAgentProcessor processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", null,
EnumSet.allOf(UserAgentProcessor.Property.class), true);
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("source_field", null));
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}
public void testNonExistentWithIgnoreMissing() throws Exception {
UserAgentProcessor processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", null,
EnumSet.allOf(UserAgentProcessor.Property.class), true);
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}
public void testNullWithoutIgnoreMissing() throws Exception {
UserAgentProcessor processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", null,
EnumSet.allOf(UserAgentProcessor.Property.class), false);
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("source_field", null));
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(exception.getMessage(), equalTo("field [source_field] is null, cannot parse user-agent."));
}
public void testNonExistentWithoutIgnoreMissing() throws Exception {
UserAgentProcessor processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", null,
EnumSet.allOf(UserAgentProcessor.Property.class), false);
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument));
assertThat(exception.getMessage(), equalTo("field [source_field] not present as part of path [source_field]"));
}
@SuppressWarnings("unchecked")
public void testCommonBrowser() throws Exception {
Map<String, Object> document = new HashMap<>();
document.put("source_field",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
assertThat(data, hasKey("target_field"));
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
assertThat(target.get("name"), is("Chrome"));
assertThat(target.get("major"), is("33"));
assertThat(target.get("minor"), is("0"));
assertThat(target.get("patch"), is("1750"));
assertNull(target.get("build"));
assertThat(target.get("os"), is("Mac OS X 10.9.2"));
assertThat(target.get("os_name"), is("Mac OS X"));
assertThat(target.get("os_major"), is("10"));
assertThat(target.get("os_minor"), is("9"));
assertThat(target.get("device"), is("Other"));
}
@SuppressWarnings("unchecked")
public void testUncommonDevice() throws Exception {
Map<String, Object> document = new HashMap<>();
@ -83,78 +124,78 @@ public class UserAgentProcessorTests extends ESTestCase {
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10+ "
+ "(KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
assertThat(data, hasKey("target_field"));
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
assertThat(target.get("name"), is("Android"));
assertThat(target.get("major"), is("3"));
assertThat(target.get("minor"), is("0"));
assertNull(target.get("patch"));
assertNull(target.get("build"));
assertThat(target.get("os"), is("Android 3.0"));
assertThat(target.get("os_name"), is("Android"));
assertThat(target.get("os_major"), is("3"));
assertThat(target.get("os_minor"), is("0"));
assertThat(target.get("device"), is("Motorola Xoom"));
}
@SuppressWarnings("unchecked")
public void testSpider() throws Exception {
Map<String, Object> document = new HashMap<>();
document.put("source_field",
"Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
assertThat(data, hasKey("target_field"));
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
assertThat(target.get("name"), is("EasouSpider"));
assertNull(target.get("major"));
assertNull(target.get("minor"));
assertNull(target.get("patch"));
assertNull(target.get("build"));
assertThat(target.get("os"), is("Other"));
assertThat(target.get("os_name"), is("Other"));
assertNull(target.get("os_major"));
assertNull(target.get("os_minor"));
assertThat(target.get("device"), is("Spider"));
}
@SuppressWarnings("unchecked")
public void testUnknown() throws Exception {
Map<String, Object> document = new HashMap<>();
document.put("source_field",
"Something I made up v42.0.1");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
assertThat(data, hasKey("target_field"));
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
assertThat(target.get("name"), is("Other"));
assertNull(target.get("major"));
assertNull(target.get("minor"));
assertNull(target.get("patch"));
assertNull(target.get("build"));
assertThat(target.get("os"), is("Other"));
assertThat(target.get("os_name"), is("Other"));
assertNull(target.get("os_major"));
assertNull(target.get("os_minor"));
assertThat(target.get("device"), is("Other"));
}
}