Add ingest-useragent plugin (#19074)

This commit is contained in:
Christoph Wurm 2016-07-01 15:49:43 +02:00 committed by GitHub
parent 27e320d5ce
commit 42addb5692
15 changed files with 6112 additions and 0 deletions

View File

@ -0,0 +1,74 @@
[[ingest-useragent]]
=== Ingest Useragent Processor Plugin
The Useragent processor extracts details from the user agent string a browser sends with its web requests.
This processor adds this information by default under the `useragent` field.
The ingest-useragent plugin ships by default with the regexes.yaml made available by uap-java with an Apache 2.0 license. For more details see https://github.com/ua-parser/uap-core.
[[ingest-useragent-install]]
[float]
==== Installation
This plugin can be installed using the plugin manager:
[source,sh]
----------------------------------------------------------------
sudo bin/elasticsearch-plugin install ingest-useragent
----------------------------------------------------------------
The plugin must be installed on every node in the cluster, and each node must
be restarted after installation.
[[ingest-useragent-remove]]
[float]
==== Removal
The plugin can be removed with the following command:
[source,sh]
----------------------------------------------------------------
sudo bin/elasticsearch-plugin remove ingest-useragent
----------------------------------------------------------------
The node must be stopped before removing the plugin.
[[using-ingest-useragent]]
==== Using the Useragent Processor in a Pipeline
[[ingest-useragent-options]]
.Useragent options
[options="header"]
|======
| Name | Required | Default | Description
| `field` | yes | - | The field containing the user agent string.
| `target_field` | no | useragent | The field that will be filled with the user agent details.
| `regex_file` | no | - | The name of the file in the `config/ingest-useragent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-useragent will use the regexes.yaml from uap-core it ships with (see below).
| `properties` | no | [`name`, `major`, `minor`, `patch`, `build`, `os`, `os_name`, `os_major`, `os_minor`, `device`] | Controls what properties are added to `target_field`.
|======
Here is an example that adds the user agent details to the `useragent` field based on the `agent` field:
[source,js]
--------------------------------------------------
{
"description" : "...",
"processors" : [
{
"useragent" : {
"field" : "agent"
}
}
]
}
--------------------------------------------------
===== Using a custom regex file
To use a custom regex file for parsing the user agents, that file has to be put into the `config/ingest-useragent` directory and
has to have a `.yaml` filename extension. The file has to be present at node startup, any changes to it or any new files added
while the node is running will not have any effect.
In practice, it will make most sense for any custom regex file to be a variant of the default file, either a more recent version
or a customised version.
The default file included in `ingest-useragent` is the `regexes.yaml` from uap-core: https://github.com/ua-parser/uap-core/blob/master/regexes.yaml

View File

@ -0,0 +1,29 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
esplugin {
description 'Ingest processor that extracts information from a user agent'
classname 'org.elasticsearch.ingest.useragent.IngestUserAgentPlugin'
}
integTest {
cluster {
extraConfigFile 'ingest-useragent/test-regexes.yaml', 'test/test-regexes.yaml'
}
}

View File

@ -0,0 +1,86 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.useragent;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.node.NodeModule;
import org.elasticsearch.plugins.Plugin;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.StandardOpenOption;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;
public class IngestUserAgentPlugin extends Plugin {
private final Setting<Long> CACHE_SIZE_SETTING = Setting.longSetting("ingest.useragent.cache_size", 1000, 0,
Setting.Property.NodeScope);
static final String DEFAULT_PARSER_NAME = "_default_";
public void onModule(NodeModule nodeModule) throws IOException {
Path userAgentConfigDirectory = nodeModule.getNode().getEnvironment().configFile().resolve("ingest-useragent");
if (Files.exists(userAgentConfigDirectory) == false && Files.isDirectory(userAgentConfigDirectory)) {
throw new IllegalStateException(
"the user agent directory [" + userAgentConfigDirectory + "] containing the regex file doesn't exist");
}
long cacheSize = CACHE_SIZE_SETTING.get(nodeModule.getNode().settings());
UserAgentCache cache = new UserAgentCache(cacheSize);
Map<String, UserAgentParser> userAgentParsers = createUserAgentParsers(userAgentConfigDirectory, cache);
nodeModule.registerProcessor(UserAgentProcessor.TYPE, (registry) -> new UserAgentProcessor.Factory(userAgentParsers));
}
static Map<String, UserAgentParser> createUserAgentParsers(Path userAgentConfigDirectory, UserAgentCache cache) throws IOException {
Map<String, UserAgentParser> userAgentParsers = new HashMap<>();
UserAgentParser defaultParser = new UserAgentParser(DEFAULT_PARSER_NAME,
IngestUserAgentPlugin.class.getResourceAsStream("/regexes.yaml"), cache);
userAgentParsers.put(DEFAULT_PARSER_NAME, defaultParser);
if (Files.exists(userAgentConfigDirectory) && Files.isDirectory(userAgentConfigDirectory)) {
PathMatcher pathMatcher = userAgentConfigDirectory.getFileSystem().getPathMatcher("glob:**.yaml");
try (Stream<Path> regexFiles = Files.find(userAgentConfigDirectory, 1,
(path, attr) -> attr.isRegularFile() && pathMatcher.matches(path))) {
Iterable<Path> iterable = regexFiles::iterator;
for (Path path : iterable) {
String parserName = path.getFileName().toString();
try (InputStream regexStream = Files.newInputStream(path, StandardOpenOption.READ)) {
userAgentParsers.put(parserName, new UserAgentParser(parserName, regexStream, cache));
}
}
}
}
return Collections.unmodifiableMap(userAgentParsers);
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.useragent;
import org.elasticsearch.common.cache.Cache;
import org.elasticsearch.common.cache.CacheBuilder;
import org.elasticsearch.ingest.useragent.UserAgentParser.Details;
import java.util.Objects;
class UserAgentCache {
private final Cache<CompositeCacheKey, Details> cache;
UserAgentCache(long cacheSize) {
cache = CacheBuilder.<CompositeCacheKey, Details>builder().setMaximumWeight(cacheSize).build();
}
public Details get(String parserName, String userAgent) {
return cache.get(new CompositeCacheKey(parserName, userAgent));
}
public void put(String parserName, String userAgent, Details details) {
cache.put(new CompositeCacheKey(parserName, userAgent), details);
}
private static final class CompositeCacheKey {
private final String parserName;
private final String userAgent;
CompositeCacheKey(String parserName, String userAgent) {
this.parserName = parserName;
this.userAgent = userAgent;
}
@Override
public boolean equals(Object obj) {
if(obj != null && obj instanceof CompositeCacheKey) {
CompositeCacheKey s = (CompositeCacheKey)obj;
return parserName.equals(s.parserName) && userAgent.equals(s.userAgent);
}
return false;
}
@Override
public int hashCode() {
return Objects.hash(parserName, userAgent);
}
}
}

View File

@ -0,0 +1,280 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.useragent;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
final class UserAgentParser {
private final UserAgentCache cache;
private final List<UserAgentSubpattern> uaPatterns = new ArrayList<>();
private final List<UserAgentSubpattern> osPatterns = new ArrayList<>();
private final List<UserAgentSubpattern> devicePatterns = new ArrayList<>();
private final String name;
public UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) {
this.name = name;
this.cache = cache;
try {
init(regexStream);
} catch (IOException e) {
throw new ElasticsearchParseException("error parsing regular expression file", e);
}
}
private void init(InputStream regexStream) throws IOException {
XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(regexStream);
XContentParser.Token token = yamlParser.nextToken();
if (token == XContentParser.Token.START_OBJECT) {
token = yamlParser.nextToken();
for (; token != null; token = yamlParser.nextToken()) {
if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("user_agent_parsers")) {
List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
for (Map<String, String> map : parserConfigurations) {
uaPatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
map.get("family_replacement"), map.get("v1_replacement"), map.get("v2_replacement"),
map.get("v3_replacement"), map.get("v4_replacement")));
}
}
else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("os_parsers")) {
List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
for (Map<String, String> map : parserConfigurations) {
osPatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
map.get("os_replacement"), map.get("os_v1_replacement"), map.get("os_v2_replacement"),
map.get("os_v3_replacement"), map.get("os_v4_replacement")));
}
}
else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("device_parsers")) {
List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
for (Map<String, String> map : parserConfigurations) {
devicePatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
map.get("device_replacement"), null, null, null, null));
}
}
}
}
if (uaPatterns.isEmpty() && osPatterns.isEmpty() && devicePatterns.isEmpty()) {
throw new ElasticsearchParseException("not a valid regular expression file");
}
}
private Pattern compilePattern(String regex, String regex_flag) {
// Only flag present in the current default regexes.yaml
if (regex_flag != null && regex_flag.equals("i")) {
return Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
} else {
return Pattern.compile(regex);
}
}
private List<Map<String, String>> readParserConfigurations(XContentParser yamlParser) throws IOException {
List <Map<String, String>> patternList = new ArrayList<>();
XContentParser.Token token = yamlParser.nextToken();
if (token != XContentParser.Token.START_ARRAY) {
throw new ElasticsearchParseException("malformed regular expression file, should continue with 'array' after 'object'");
}
token = yamlParser.nextToken();
if (token != XContentParser.Token.START_OBJECT) {
throw new ElasticsearchParseException("malformed regular expression file, expecting 'object'");
}
while (token == XContentParser.Token.START_OBJECT) {
token = yamlParser.nextToken();
if (token != XContentParser.Token.FIELD_NAME) {
throw new ElasticsearchParseException("malformed regular expression file, should continue with 'field_name' after 'array'");
}
Map<String, String> regexMap = new HashMap<>();
for (; token == XContentParser.Token.FIELD_NAME; token = yamlParser.nextToken()) {
String fieldName = yamlParser.currentName();
token = yamlParser.nextToken();
String fieldValue = yamlParser.text();
regexMap.put(fieldName, fieldValue);
}
patternList.add(regexMap);
token = yamlParser.nextToken();
}
return patternList;
}
List<UserAgentSubpattern> getUaPatterns() {
return uaPatterns;
}
List<UserAgentSubpattern> getOsPatterns() {
return osPatterns;
}
List<UserAgentSubpattern> getDevicePatterns() {
return devicePatterns;
}
String getName() {
return name;
}
public Details parse(String agentString) {
Details details = cache.get(name, agentString);;
if (details == null) {
VersionedName userAgent = findMatch(uaPatterns, agentString);
VersionedName operatingSystem = findMatch(osPatterns, agentString);
VersionedName device = findMatch(devicePatterns, agentString);
details = new Details(userAgent, operatingSystem, device);
cache.put(name, agentString, details);
}
return details;
}
private VersionedName findMatch(List<UserAgentSubpattern> possiblePatterns, String agentString) {
VersionedName name;
for (UserAgentSubpattern pattern : possiblePatterns) {
name = pattern.match(agentString);
if (name != null) {
return name;
}
}
return null;
}
static final class Details {
public final VersionedName userAgent;
public final VersionedName operatingSystem;
public final VersionedName device;
public Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) {
this.userAgent = userAgent;
this.operatingSystem = operatingSystem;
this.device = device;
}
}
static final class VersionedName {
public final String name;
public final String major;
public final String minor;
public final String patch;
public final String build;
public VersionedName(String name, String major, String minor, String patch, String build) {
this.name = name;
this.major = major;
this.minor = minor;
this.patch = patch;
this.build = build;
}
}
/**
* One of: user agent, operating system, device
*/
static final class UserAgentSubpattern {
private final Pattern pattern;
private final String nameReplacement, v1Replacement, v2Replacement, v3Replacement, v4Replacement;
public UserAgentSubpattern(Pattern pattern, String nameReplacement,
String v1Replacement, String v2Replacement, String v3Replacement, String v4Replacement) {
this.pattern = pattern;
this.nameReplacement = nameReplacement;
this.v1Replacement = v1Replacement;
this.v2Replacement = v2Replacement;
this.v3Replacement = v3Replacement;
this.v4Replacement = v4Replacement;
}
public VersionedName match(String agentString) {
String name = null, major = null, minor = null, patch = null, build = null;
Matcher matcher = pattern.matcher(agentString);
if (!matcher.find()) {
return null;
}
int groupCount = matcher.groupCount();
if (nameReplacement != null) {
if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) {
name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1)));
} else {
name = nameReplacement;
}
} else if (groupCount >= 1) {
name = matcher.group(1);
}
if (v1Replacement != null) {
major = v1Replacement;
} else if (groupCount >= 2) {
major = matcher.group(2);
}
if (v2Replacement != null) {
minor = v2Replacement;
} else if (groupCount >= 3) {
minor = matcher.group(3);
}
if (v3Replacement != null) {
patch = v3Replacement;
} else if (groupCount >= 4) {
patch = matcher.group(4);
}
if (v4Replacement != null) {
build = v4Replacement;
} else if (groupCount >= 5) {
build = matcher.group(5);
}
return name == null ? null : new VersionedName(name, major, minor, patch, build);
}
}
}

View File

@ -0,0 +1,242 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.useragent;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.AbstractProcessorFactory;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.useragent.UserAgentParser.Details;
import org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalStringProperty;
public class UserAgentProcessor extends AbstractProcessor {
public static final String TYPE = "useragent";
private final String field;
private final String targetField;
private final Set<Property> properties;
private final UserAgentParser parser;
public UserAgentProcessor(String tag, String field, String targetField, UserAgentParser parser, Set<Property> properties) {
super(tag);
this.field = field;
this.targetField = targetField;
this.parser = parser;
this.properties = properties;
}
@Override
public void execute(IngestDocument ingestDocument) throws Exception {
String userAgent = ingestDocument.getFieldValue(field, String.class);
Details uaClient = parser.parse(userAgent);
Map<String, Object> uaDetails = new HashMap<>();
for (Property property : this.properties) {
switch (property) {
case NAME:
if (uaClient.userAgent != null && uaClient.userAgent.name != null) {
uaDetails.put("name", uaClient.userAgent.name);
}
else {
uaDetails.put("name", "Other");
}
break;
case MAJOR:
if (uaClient.userAgent != null && uaClient.userAgent.major != null) {
uaDetails.put("major", uaClient.userAgent.major);
}
break;
case MINOR:
if (uaClient.userAgent != null && uaClient.userAgent.minor != null) {
uaDetails.put("minor", uaClient.userAgent.minor);
}
break;
case PATCH:
if (uaClient.userAgent != null && uaClient.userAgent.patch != null) {
uaDetails.put("patch", uaClient.userAgent.patch);
}
break;
case BUILD:
if (uaClient.userAgent != null && uaClient.userAgent.build != null) {
uaDetails.put("build", uaClient.userAgent.build);
}
break;
case OS:
if (uaClient.operatingSystem != null) {
uaDetails.put("os", buildFullOSName(uaClient.operatingSystem));
}
else {
uaDetails.put("os", "Other");
}
break;
case OS_NAME:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.name != null) {
uaDetails.put("os_name", uaClient.operatingSystem.name);
}
else {
uaDetails.put("os_name", "Other");
}
break;
case OS_MAJOR:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.major != null) {
uaDetails.put("os_major", uaClient.operatingSystem.major);
}
break;
case OS_MINOR:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.minor != null) {
uaDetails.put("os_minor", uaClient.operatingSystem.minor);
}
break;
case DEVICE:
if (uaClient.device != null && uaClient.device.name != null) {
uaDetails.put("device", uaClient.device.name);
}
else {
uaDetails.put("device", "Other");
}
break;
}
}
ingestDocument.setFieldValue(targetField, uaDetails);
}
/** To maintain compatibility with logstash-filter-useragent */
private String buildFullOSName(VersionedName operatingSystem) {
if (operatingSystem == null || operatingSystem.name == null) {
return null;
}
StringBuilder sb = new StringBuilder(operatingSystem.name);
if (operatingSystem.major != null) {
sb.append(" ");
sb.append(operatingSystem.major);
if (operatingSystem.minor != null) {
sb.append(".");
sb.append(operatingSystem.minor);
if (operatingSystem.patch != null) {
sb.append(".");
sb.append(operatingSystem.patch);
if (operatingSystem.build != null) {
sb.append(".");
sb.append(operatingSystem.build);
}
}
}
}
return sb.toString();
}
@Override
public String getType() {
return TYPE;
}
String getField() {
return field;
}
String getTargetField() {
return targetField;
}
Set<Property> getProperties() {
return properties;
}
UserAgentParser getUaParser() {
return parser;
}
public static final class Factory extends AbstractProcessorFactory<UserAgentProcessor> {
private final Map<String, UserAgentParser> userAgentParsers;
public Factory(Map<String, UserAgentParser> userAgentParsers) {
this.userAgentParsers = userAgentParsers;
}
@Override
public UserAgentProcessor doCreate(String processorTag, Map<String, Object> config) throws Exception {
String field = readStringProperty(TYPE, processorTag, config, "field");
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "useragent");
String regexFilename = readStringProperty(TYPE, processorTag, config, "regex_file", IngestUserAgentPlugin.DEFAULT_PARSER_NAME);
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
UserAgentParser parser = userAgentParsers.get(regexFilename);
if (parser == null) {
throw newConfigurationException(TYPE, processorTag,
"regex_file", "regex file [" + regexFilename + "] doesn't exist (has to exist at node startup)");
}
final Set<Property> properties;
if (propertyNames != null) {
properties = EnumSet.noneOf(Property.class);
for (String fieldName : propertyNames) {
try {
properties.add(Property.parseProperty(fieldName));
} catch (IllegalArgumentException e) {
throw newConfigurationException(TYPE, processorTag, "properties", e.getMessage());
}
}
} else {
properties = EnumSet.allOf(Property.class);
}
return new UserAgentProcessor(processorTag, field, targetField, parser, properties);
}
}
enum Property {
NAME, MAJOR, MINOR, PATCH, OS, OS_NAME, OS_MAJOR, OS_MINOR, DEVICE, BUILD;
public static Property parseProperty(String propertyName) {
try {
return valueOf(propertyName.toUpperCase(Locale.ROOT));
}
catch (IllegalArgumentException e) {
throw new IllegalArgumentException("illegal property value [" + propertyName + "]. valid values are " +
Arrays.toString(EnumSet.allOf(Property.class).toArray()));
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,176 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.useragent;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.ingest.AbstractProcessorFactory;
import org.elasticsearch.test.ESTestCase;
import org.junit.BeforeClass;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
public class UserAgentProcessorFactoryTests extends ESTestCase {
private static Map<String, UserAgentParser> userAgentParsers;
private static String regexWithoutDevicesFilename = "regexes_without_devices.yaml";
private static Path userAgentConfigDir;
@BeforeClass
public static void createUserAgentParsers() throws IOException {
Path configDir = createTempDir();
userAgentConfigDir = configDir.resolve("ingest-useragent");
Files.createDirectories(userAgentConfigDir);
// Copy file, leaving out the device parsers at the end
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(UserAgentProcessor.class.getResourceAsStream("/regexes.yaml"), StandardCharsets.UTF_8));
BufferedWriter writer = Files.newBufferedWriter(userAgentConfigDir.resolve(regexWithoutDevicesFilename));) {
String line;
while ((line = reader.readLine()) != null) {
if (line.startsWith("device_parsers:")) {
break;
}
writer.write(line);
writer.newLine();
}
}
userAgentParsers = IngestUserAgentPlugin.createUserAgentParsers(userAgentConfigDir, new UserAgentCache(1000));
}
public void testBuildDefaults() throws Exception {
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
String processorTag = randomAsciiOfLength(10);
config.put(AbstractProcessorFactory.TAG_KEY, processorTag);
UserAgentProcessor processor = factory.create(config);
assertThat(processor.getTag(), equalTo(processorTag));
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("useragent"));
assertThat(processor.getUaParser().getUaPatterns().size(), greaterThan(0));
assertThat(processor.getUaParser().getOsPatterns().size(), greaterThan(0));
assertThat(processor.getUaParser().getDevicePatterns().size(), greaterThan(0));
assertThat(processor.getProperties(), equalTo(EnumSet.allOf(UserAgentProcessor.Property.class)));
}
public void testBuildTargetField() throws Exception {
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("target_field", "_target_field");
UserAgentProcessor processor = factory.create(config);
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("_target_field"));
}
public void testBuildRegexFile() throws Exception {
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("regex_file", regexWithoutDevicesFilename);
UserAgentProcessor processor = factory.create(config);
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getUaParser().getUaPatterns().size(), greaterThan(0));
assertThat(processor.getUaParser().getOsPatterns().size(), greaterThan(0));
assertThat(processor.getUaParser().getDevicePatterns().size(), equalTo(0));
}
public void testBuildNonExistingRegexFile() throws Exception {
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("regex_file", "does-not-exist.yaml");
ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config));
assertThat(e.getMessage(), equalTo("[regex_file] regex file [does-not-exist.yaml] doesn't exist (has to exist at node startup)"));
}
public void testBuildFields() throws Exception {
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
Set<UserAgentProcessor.Property> properties = EnumSet.noneOf(UserAgentProcessor.Property.class);
List<String> fieldNames = new ArrayList<>();
int numFields = scaledRandomIntBetween(1, UserAgentProcessor.Property.values().length);
for (int i = 0; i < numFields; i++) {
UserAgentProcessor.Property property = UserAgentProcessor.Property.values()[i];
properties.add(property);
fieldNames.add(property.name().toLowerCase(Locale.ROOT));
}
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("properties", fieldNames);
UserAgentProcessor processor = factory.create(config);
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getProperties(), equalTo(properties));
}
public void testInvalidProperty() throws Exception {
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("properties", Collections.singletonList("invalid"));
ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config));
assertThat(e.getMessage(), equalTo("[properties] illegal property value [invalid]. valid values are [NAME, MAJOR, MINOR, "
+ "PATCH, OS, OS_NAME, OS_MAJOR, OS_MINOR, DEVICE, BUILD]"));
}
public void testInvalidPropertiesType() throws Exception {
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("properties", "invalid");
ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config));
assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
}
}

View File

@ -0,0 +1,161 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.useragent;
import org.elasticsearch.ingest.RandomDocumentPicks;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.useragent.UserAgentProcessor;
import org.elasticsearch.test.ESTestCase;
import org.junit.BeforeClass;
import java.io.IOException;
import java.io.InputStream;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import static org.hamcrest.Matchers.hasKey;
import static org.hamcrest.Matchers.is;
public class UserAgentProcessorTests extends ESTestCase {
private static UserAgentProcessor processor;
@BeforeClass
public static void setupProcessor() throws IOException {
InputStream regexStream = UserAgentProcessor.class.getResourceAsStream("/regexes.yaml");
assertNotNull(regexStream);
UserAgentParser parser = new UserAgentParser(randomAsciiOfLength(10), regexStream, new UserAgentCache(1000));
processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", parser,
EnumSet.allOf(UserAgentProcessor.Property.class));
}
@SuppressWarnings("unchecked")
public void testCommonBrowser() throws Exception {
Map<String, Object> document = new HashMap<>();
document.put("source_field",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
assertThat(data, hasKey("target_field"));
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
assertThat(target.get("name"), is("Chrome"));
assertThat(target.get("major"), is("33"));
assertThat(target.get("minor"), is("0"));
assertThat(target.get("patch"), is("1750"));
assertNull(target.get("build"));
assertThat(target.get("os"), is("Mac OS X 10.9.2"));
assertThat(target.get("os_name"), is("Mac OS X"));
assertThat(target.get("os_major"), is("10"));
assertThat(target.get("os_minor"), is("9"));
assertThat(target.get("device"), is("Other"));
}
@SuppressWarnings("unchecked")
public void testUncommonDevice() throws Exception {
Map<String, Object> document = new HashMap<>();
document.put("source_field",
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10+ "
+ "(KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
assertThat(data, hasKey("target_field"));
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
assertThat(target.get("name"), is("Android"));
assertThat(target.get("major"), is("3"));
assertThat(target.get("minor"), is("0"));
assertNull(target.get("patch"));
assertNull(target.get("build"));
assertThat(target.get("os"), is("Android 3.0"));
assertThat(target.get("os_name"), is("Android"));
assertThat(target.get("os_major"), is("3"));
assertThat(target.get("os_minor"), is("0"));
assertThat(target.get("device"), is("Motorola Xoom"));
}
@SuppressWarnings("unchecked")
public void testSpider() throws Exception {
Map<String, Object> document = new HashMap<>();
document.put("source_field",
"Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
assertThat(data, hasKey("target_field"));
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
assertThat(target.get("name"), is("EasouSpider"));
assertNull(target.get("major"));
assertNull(target.get("minor"));
assertNull(target.get("patch"));
assertNull(target.get("build"));
assertThat(target.get("os"), is("Other"));
assertThat(target.get("os_name"), is("Other"));
assertNull(target.get("os_major"));
assertNull(target.get("os_minor"));
assertThat(target.get("device"), is("Spider"));
}
@SuppressWarnings("unchecked")
public void testUnknown() throws Exception {
Map<String, Object> document = new HashMap<>();
document.put("source_field",
"Something I made up v42.0.1");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
assertThat(data, hasKey("target_field"));
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
assertThat(target.get("name"), is("Other"));
assertNull(target.get("major"));
assertNull(target.get("minor"));
assertNull(target.get("patch"));
assertNull(target.get("build"));
assertThat(target.get("os"), is("Other"));
assertThat(target.get("os_name"), is("Other"));
assertNull(target.get("os_major"));
assertNull(target.get("os_minor"));
assertThat(target.get("device"), is("Other"));
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.useragent;
import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.test.rest.RestTestCandidate;
import org.elasticsearch.test.rest.parser.RestTestParseException;
import java.io.IOException;
public class UserAgentRestIT extends ESRestTestCase {
public UserAgentRestIT(@Name("yaml") RestTestCandidate testCandidate) {
super(testCandidate);
}
@ParametersFactory
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
return ESRestTestCase.createParameters(0, 1);
}
}

View File

@ -0,0 +1,11 @@
"ingest-useragent plugin installed":
- do:
cluster.state: {}
- set: {master_node: master}
- do:
nodes.info: {}
- match: { nodes.$master.plugins.0.name: ingest-useragent }
- match: { nodes.$master.ingest.processors.0.type: useragent }

View File

@ -0,0 +1,86 @@
---
"Test user agent processor with defaults":
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"useragent" : {
"field" : "field1"
}
}
]
}
- match: { acknowledged: true }
- do:
index:
index: test
type: test
id: 1
pipeline: "my_pipeline"
body: {field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36"}
- do:
get:
index: test
type: test
id: 1
- match: { _source.field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" }
- match: { _source.useragent.name: "Chrome" }
- match: { _source.useragent.os: "Mac OS X 10.9.2" }
- match: { _source.useragent.os_name: "Mac OS X" }
- match: { _source.useragent.os_major: "10" }
- match: { _source.useragent.os_minor: "9" }
- match: { _source.useragent.major: "33" }
- match: { _source.useragent.minor: "0" }
- match: { _source.useragent.patch: "1750" }
- match: { _source.useragent.device: "Other" }
---
"Test user agent processor with parameters":
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"useragent" : {
"field" : "field1",
"target_field": "field2",
"properties": ["os"]
}
}
]
}
- match: { acknowledged: true }
- do:
index:
index: test
type: test
id: 1
pipeline: "my_pipeline"
body: {field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36"}
- do:
get:
index: test
type: test
id: 1
- match: { _source.field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" }
- match: { _source.field2.os: "Mac OS X 10.9.2" }
- is_false: _source.useragent
- is_false: _source.field2.name
- is_false: _source.field2.os_name
- is_false: _source.field2.os_major
- is_false: _source.field2.os_minor
- is_false: _source.field2.major
- is_false: _source.field2.minor
- is_false: _source.field2.patch
- is_false: _source.field2.device

View File

@ -0,0 +1,42 @@
---
"Test user agent processor with custom regex file":
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"useragent" : {
"field": "field1",
"regex_file": "test-regexes.yaml"
}
}
]
}
- match: { acknowledged: true }
- do:
index:
index: test
type: test
id: 1
pipeline: "my_pipeline"
body: {field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36"}
- do:
get:
index: test
type: test
id: 1
- match: { _source.field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" }
- match: { _source.useragent.name: "Test" }
- match: { _source.useragent.os: "Other" }
- match: { _source.useragent.os_name: "Other" }
- match: { _source.useragent.device: "Other" }
- is_false: _source.useragent.os_major
- is_false: _source.useragent.os_minor
- is_false: _source.useragent.major
- is_false: _source.useragent.minor
- is_false: _source.useragent.patch

View File

@ -0,0 +1,3 @@
user_agent_parsers:
- regex: '.*'
family_replacement: 'Test'

View File

@ -36,6 +36,7 @@ List projects = [
'plugins:discovery-gce',
'plugins:ingest-geoip',
'plugins:ingest-attachment',
'plugins:ingest-useragent',
'plugins:lang-javascript',
'plugins:lang-python',
'plugins:mapper-attachments',