mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 02:14:54 +00:00
Add ingest-useragent plugin (#19074)
This commit is contained in:
parent
27e320d5ce
commit
42addb5692
74
docs/plugins/ingest-useragent.asciidoc
Normal file
74
docs/plugins/ingest-useragent.asciidoc
Normal file
@ -0,0 +1,74 @@
|
||||
[[ingest-useragent]]
|
||||
=== Ingest Useragent Processor Plugin
|
||||
|
||||
The Useragent processor extracts details from the user agent string a browser sends with its web requests.
|
||||
This processor adds this information by default under the `useragent` field.
|
||||
|
||||
The ingest-useragent plugin ships by default with the regexes.yaml made available by uap-java with an Apache 2.0 license. For more details see https://github.com/ua-parser/uap-core.
|
||||
|
||||
[[ingest-useragent-install]]
|
||||
[float]
|
||||
==== Installation
|
||||
|
||||
This plugin can be installed using the plugin manager:
|
||||
|
||||
[source,sh]
|
||||
----------------------------------------------------------------
|
||||
sudo bin/elasticsearch-plugin install ingest-useragent
|
||||
----------------------------------------------------------------
|
||||
|
||||
The plugin must be installed on every node in the cluster, and each node must
|
||||
be restarted after installation.
|
||||
|
||||
[[ingest-useragent-remove]]
|
||||
[float]
|
||||
==== Removal
|
||||
|
||||
The plugin can be removed with the following command:
|
||||
|
||||
[source,sh]
|
||||
----------------------------------------------------------------
|
||||
sudo bin/elasticsearch-plugin remove ingest-useragent
|
||||
----------------------------------------------------------------
|
||||
|
||||
The node must be stopped before removing the plugin.
|
||||
|
||||
[[using-ingest-useragent]]
|
||||
==== Using the Useragent Processor in a Pipeline
|
||||
|
||||
[[ingest-useragent-options]]
|
||||
.Useragent options
|
||||
[options="header"]
|
||||
|======
|
||||
| Name | Required | Default | Description
|
||||
| `field` | yes | - | The field containing the user agent string.
|
||||
| `target_field` | no | useragent | The field that will be filled with the user agent details.
|
||||
| `regex_file` | no | - | The name of the file in the `config/ingest-useragent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-useragent will use the regexes.yaml from uap-core it ships with (see below).
|
||||
| `properties` | no | [`name`, `major`, `minor`, `patch`, `build`, `os`, `os_name`, `os_major`, `os_minor`, `device`] | Controls what properties are added to `target_field`.
|
||||
|======
|
||||
|
||||
Here is an example that adds the user agent details to the `useragent` field based on the `agent` field:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"description" : "...",
|
||||
"processors" : [
|
||||
{
|
||||
"useragent" : {
|
||||
"field" : "agent"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
===== Using a custom regex file
|
||||
To use a custom regex file for parsing the user agents, that file has to be put into the `config/ingest-useragent` directory and
|
||||
has to have a `.yaml` filename extension. The file has to be present at node startup, any changes to it or any new files added
|
||||
while the node is running will not have any effect.
|
||||
|
||||
In practice, it will make most sense for any custom regex file to be a variant of the default file, either a more recent version
|
||||
or a customised version.
|
||||
|
||||
The default file included in `ingest-useragent` is the `regexes.yaml` from uap-core: https://github.com/ua-parser/uap-core/blob/master/regexes.yaml
|
29
plugins/ingest-useragent/build.gradle
Normal file
29
plugins/ingest-useragent/build.gradle
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
esplugin {
|
||||
description 'Ingest processor that extracts information from a user agent'
|
||||
classname 'org.elasticsearch.ingest.useragent.IngestUserAgentPlugin'
|
||||
}
|
||||
|
||||
integTest {
|
||||
cluster {
|
||||
extraConfigFile 'ingest-useragent/test-regexes.yaml', 'test/test-regexes.yaml'
|
||||
}
|
||||
}
|
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.useragent;
|
||||
|
||||
import org.elasticsearch.common.settings.Setting;
|
||||
import org.elasticsearch.node.NodeModule;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.PathMatcher;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class IngestUserAgentPlugin extends Plugin {
|
||||
|
||||
private final Setting<Long> CACHE_SIZE_SETTING = Setting.longSetting("ingest.useragent.cache_size", 1000, 0,
|
||||
Setting.Property.NodeScope);
|
||||
|
||||
static final String DEFAULT_PARSER_NAME = "_default_";
|
||||
|
||||
public void onModule(NodeModule nodeModule) throws IOException {
|
||||
Path userAgentConfigDirectory = nodeModule.getNode().getEnvironment().configFile().resolve("ingest-useragent");
|
||||
|
||||
if (Files.exists(userAgentConfigDirectory) == false && Files.isDirectory(userAgentConfigDirectory)) {
|
||||
throw new IllegalStateException(
|
||||
"the user agent directory [" + userAgentConfigDirectory + "] containing the regex file doesn't exist");
|
||||
}
|
||||
|
||||
long cacheSize = CACHE_SIZE_SETTING.get(nodeModule.getNode().settings());
|
||||
|
||||
UserAgentCache cache = new UserAgentCache(cacheSize);
|
||||
|
||||
Map<String, UserAgentParser> userAgentParsers = createUserAgentParsers(userAgentConfigDirectory, cache);
|
||||
|
||||
nodeModule.registerProcessor(UserAgentProcessor.TYPE, (registry) -> new UserAgentProcessor.Factory(userAgentParsers));
|
||||
}
|
||||
|
||||
static Map<String, UserAgentParser> createUserAgentParsers(Path userAgentConfigDirectory, UserAgentCache cache) throws IOException {
|
||||
Map<String, UserAgentParser> userAgentParsers = new HashMap<>();
|
||||
|
||||
UserAgentParser defaultParser = new UserAgentParser(DEFAULT_PARSER_NAME,
|
||||
IngestUserAgentPlugin.class.getResourceAsStream("/regexes.yaml"), cache);
|
||||
userAgentParsers.put(DEFAULT_PARSER_NAME, defaultParser);
|
||||
|
||||
if (Files.exists(userAgentConfigDirectory) && Files.isDirectory(userAgentConfigDirectory)) {
|
||||
PathMatcher pathMatcher = userAgentConfigDirectory.getFileSystem().getPathMatcher("glob:**.yaml");
|
||||
|
||||
try (Stream<Path> regexFiles = Files.find(userAgentConfigDirectory, 1,
|
||||
(path, attr) -> attr.isRegularFile() && pathMatcher.matches(path))) {
|
||||
Iterable<Path> iterable = regexFiles::iterator;
|
||||
for (Path path : iterable) {
|
||||
String parserName = path.getFileName().toString();
|
||||
try (InputStream regexStream = Files.newInputStream(path, StandardOpenOption.READ)) {
|
||||
userAgentParsers.put(parserName, new UserAgentParser(parserName, regexStream, cache));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Collections.unmodifiableMap(userAgentParsers);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.useragent;
|
||||
|
||||
import org.elasticsearch.common.cache.Cache;
|
||||
import org.elasticsearch.common.cache.CacheBuilder;
|
||||
import org.elasticsearch.ingest.useragent.UserAgentParser.Details;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
class UserAgentCache {
|
||||
private final Cache<CompositeCacheKey, Details> cache;
|
||||
|
||||
UserAgentCache(long cacheSize) {
|
||||
cache = CacheBuilder.<CompositeCacheKey, Details>builder().setMaximumWeight(cacheSize).build();
|
||||
}
|
||||
|
||||
public Details get(String parserName, String userAgent) {
|
||||
return cache.get(new CompositeCacheKey(parserName, userAgent));
|
||||
}
|
||||
|
||||
public void put(String parserName, String userAgent, Details details) {
|
||||
cache.put(new CompositeCacheKey(parserName, userAgent), details);
|
||||
}
|
||||
|
||||
private static final class CompositeCacheKey {
|
||||
private final String parserName;
|
||||
private final String userAgent;
|
||||
|
||||
CompositeCacheKey(String parserName, String userAgent) {
|
||||
this.parserName = parserName;
|
||||
this.userAgent = userAgent;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if(obj != null && obj instanceof CompositeCacheKey) {
|
||||
CompositeCacheKey s = (CompositeCacheKey)obj;
|
||||
return parserName.equals(s.parserName) && userAgent.equals(s.userAgent);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(parserName, userAgent);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,280 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.useragent;
|
||||
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
final class UserAgentParser {
|
||||
|
||||
private final UserAgentCache cache;
|
||||
private final List<UserAgentSubpattern> uaPatterns = new ArrayList<>();
|
||||
private final List<UserAgentSubpattern> osPatterns = new ArrayList<>();
|
||||
private final List<UserAgentSubpattern> devicePatterns = new ArrayList<>();
|
||||
private final String name;
|
||||
|
||||
public UserAgentParser(String name, InputStream regexStream, UserAgentCache cache) {
|
||||
this.name = name;
|
||||
this.cache = cache;
|
||||
|
||||
try {
|
||||
init(regexStream);
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchParseException("error parsing regular expression file", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void init(InputStream regexStream) throws IOException {
|
||||
XContentParser yamlParser = XContentFactory.xContent(XContentType.YAML).createParser(regexStream);
|
||||
|
||||
XContentParser.Token token = yamlParser.nextToken();
|
||||
|
||||
if (token == XContentParser.Token.START_OBJECT) {
|
||||
token = yamlParser.nextToken();
|
||||
|
||||
for (; token != null; token = yamlParser.nextToken()) {
|
||||
if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("user_agent_parsers")) {
|
||||
List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
|
||||
|
||||
for (Map<String, String> map : parserConfigurations) {
|
||||
uaPatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
|
||||
map.get("family_replacement"), map.get("v1_replacement"), map.get("v2_replacement"),
|
||||
map.get("v3_replacement"), map.get("v4_replacement")));
|
||||
}
|
||||
}
|
||||
else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("os_parsers")) {
|
||||
List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
|
||||
|
||||
for (Map<String, String> map : parserConfigurations) {
|
||||
osPatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
|
||||
map.get("os_replacement"), map.get("os_v1_replacement"), map.get("os_v2_replacement"),
|
||||
map.get("os_v3_replacement"), map.get("os_v4_replacement")));
|
||||
}
|
||||
}
|
||||
else if (token == XContentParser.Token.FIELD_NAME && yamlParser.currentName().equals("device_parsers")) {
|
||||
List<Map<String, String>> parserConfigurations = readParserConfigurations(yamlParser);
|
||||
|
||||
for (Map<String, String> map : parserConfigurations) {
|
||||
devicePatterns.add(new UserAgentSubpattern(compilePattern(map.get("regex"), map.get("regex_flag")),
|
||||
map.get("device_replacement"), null, null, null, null));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (uaPatterns.isEmpty() && osPatterns.isEmpty() && devicePatterns.isEmpty()) {
|
||||
throw new ElasticsearchParseException("not a valid regular expression file");
|
||||
}
|
||||
}
|
||||
|
||||
private Pattern compilePattern(String regex, String regex_flag) {
|
||||
// Only flag present in the current default regexes.yaml
|
||||
if (regex_flag != null && regex_flag.equals("i")) {
|
||||
return Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
|
||||
} else {
|
||||
return Pattern.compile(regex);
|
||||
}
|
||||
}
|
||||
|
||||
private List<Map<String, String>> readParserConfigurations(XContentParser yamlParser) throws IOException {
|
||||
List <Map<String, String>> patternList = new ArrayList<>();
|
||||
|
||||
XContentParser.Token token = yamlParser.nextToken();
|
||||
if (token != XContentParser.Token.START_ARRAY) {
|
||||
throw new ElasticsearchParseException("malformed regular expression file, should continue with 'array' after 'object'");
|
||||
}
|
||||
|
||||
token = yamlParser.nextToken();
|
||||
if (token != XContentParser.Token.START_OBJECT) {
|
||||
throw new ElasticsearchParseException("malformed regular expression file, expecting 'object'");
|
||||
}
|
||||
|
||||
while (token == XContentParser.Token.START_OBJECT) {
|
||||
token = yamlParser.nextToken();
|
||||
|
||||
if (token != XContentParser.Token.FIELD_NAME) {
|
||||
throw new ElasticsearchParseException("malformed regular expression file, should continue with 'field_name' after 'array'");
|
||||
}
|
||||
|
||||
Map<String, String> regexMap = new HashMap<>();
|
||||
for (; token == XContentParser.Token.FIELD_NAME; token = yamlParser.nextToken()) {
|
||||
String fieldName = yamlParser.currentName();
|
||||
|
||||
token = yamlParser.nextToken();
|
||||
String fieldValue = yamlParser.text();
|
||||
regexMap.put(fieldName, fieldValue);
|
||||
}
|
||||
|
||||
patternList.add(regexMap);
|
||||
|
||||
token = yamlParser.nextToken();
|
||||
}
|
||||
|
||||
return patternList;
|
||||
}
|
||||
|
||||
List<UserAgentSubpattern> getUaPatterns() {
|
||||
return uaPatterns;
|
||||
}
|
||||
|
||||
List<UserAgentSubpattern> getOsPatterns() {
|
||||
return osPatterns;
|
||||
}
|
||||
|
||||
List<UserAgentSubpattern> getDevicePatterns() {
|
||||
return devicePatterns;
|
||||
}
|
||||
|
||||
String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public Details parse(String agentString) {
|
||||
Details details = cache.get(name, agentString);;
|
||||
|
||||
if (details == null) {
|
||||
VersionedName userAgent = findMatch(uaPatterns, agentString);
|
||||
VersionedName operatingSystem = findMatch(osPatterns, agentString);
|
||||
VersionedName device = findMatch(devicePatterns, agentString);
|
||||
|
||||
details = new Details(userAgent, operatingSystem, device);
|
||||
|
||||
cache.put(name, agentString, details);
|
||||
}
|
||||
|
||||
return details;
|
||||
}
|
||||
|
||||
private VersionedName findMatch(List<UserAgentSubpattern> possiblePatterns, String agentString) {
|
||||
VersionedName name;
|
||||
for (UserAgentSubpattern pattern : possiblePatterns) {
|
||||
name = pattern.match(agentString);
|
||||
|
||||
if (name != null) {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
static final class Details {
|
||||
public final VersionedName userAgent;
|
||||
public final VersionedName operatingSystem;
|
||||
public final VersionedName device;
|
||||
|
||||
public Details(VersionedName userAgent, VersionedName operatingSystem, VersionedName device) {
|
||||
this.userAgent = userAgent;
|
||||
this.operatingSystem = operatingSystem;
|
||||
this.device = device;
|
||||
}
|
||||
}
|
||||
|
||||
static final class VersionedName {
|
||||
public final String name;
|
||||
public final String major;
|
||||
public final String minor;
|
||||
public final String patch;
|
||||
public final String build;
|
||||
|
||||
public VersionedName(String name, String major, String minor, String patch, String build) {
|
||||
this.name = name;
|
||||
this.major = major;
|
||||
this.minor = minor;
|
||||
this.patch = patch;
|
||||
this.build = build;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* One of: user agent, operating system, device
|
||||
*/
|
||||
static final class UserAgentSubpattern {
|
||||
private final Pattern pattern;
|
||||
private final String nameReplacement, v1Replacement, v2Replacement, v3Replacement, v4Replacement;
|
||||
|
||||
public UserAgentSubpattern(Pattern pattern, String nameReplacement,
|
||||
String v1Replacement, String v2Replacement, String v3Replacement, String v4Replacement) {
|
||||
this.pattern = pattern;
|
||||
this.nameReplacement = nameReplacement;
|
||||
this.v1Replacement = v1Replacement;
|
||||
this.v2Replacement = v2Replacement;
|
||||
this.v3Replacement = v3Replacement;
|
||||
this.v4Replacement = v4Replacement;
|
||||
}
|
||||
|
||||
public VersionedName match(String agentString) {
|
||||
String name = null, major = null, minor = null, patch = null, build = null;
|
||||
Matcher matcher = pattern.matcher(agentString);
|
||||
|
||||
if (!matcher.find()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int groupCount = matcher.groupCount();
|
||||
|
||||
if (nameReplacement != null) {
|
||||
if (nameReplacement.contains("$1") && groupCount >= 1 && matcher.group(1) != null) {
|
||||
name = nameReplacement.replaceFirst("\\$1", Matcher.quoteReplacement(matcher.group(1)));
|
||||
} else {
|
||||
name = nameReplacement;
|
||||
}
|
||||
} else if (groupCount >= 1) {
|
||||
name = matcher.group(1);
|
||||
}
|
||||
|
||||
if (v1Replacement != null) {
|
||||
major = v1Replacement;
|
||||
} else if (groupCount >= 2) {
|
||||
major = matcher.group(2);
|
||||
}
|
||||
|
||||
if (v2Replacement != null) {
|
||||
minor = v2Replacement;
|
||||
} else if (groupCount >= 3) {
|
||||
minor = matcher.group(3);
|
||||
}
|
||||
|
||||
if (v3Replacement != null) {
|
||||
patch = v3Replacement;
|
||||
} else if (groupCount >= 4) {
|
||||
patch = matcher.group(4);
|
||||
}
|
||||
|
||||
if (v4Replacement != null) {
|
||||
build = v4Replacement;
|
||||
} else if (groupCount >= 5) {
|
||||
build = matcher.group(5);
|
||||
}
|
||||
|
||||
return name == null ? null : new VersionedName(name, major, minor, patch, build);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,242 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.useragent;
|
||||
|
||||
import org.elasticsearch.ingest.AbstractProcessor;
|
||||
import org.elasticsearch.ingest.AbstractProcessorFactory;
|
||||
import org.elasticsearch.ingest.IngestDocument;
|
||||
import org.elasticsearch.ingest.useragent.UserAgentParser.Details;
|
||||
import org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
|
||||
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalList;
|
||||
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
|
||||
import static org.elasticsearch.ingest.ConfigurationUtils.readOptionalStringProperty;
|
||||
|
||||
public class UserAgentProcessor extends AbstractProcessor {
|
||||
|
||||
public static final String TYPE = "useragent";
|
||||
|
||||
private final String field;
|
||||
private final String targetField;
|
||||
private final Set<Property> properties;
|
||||
|
||||
private final UserAgentParser parser;
|
||||
|
||||
public UserAgentProcessor(String tag, String field, String targetField, UserAgentParser parser, Set<Property> properties) {
|
||||
super(tag);
|
||||
this.field = field;
|
||||
this.targetField = targetField;
|
||||
this.parser = parser;
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void execute(IngestDocument ingestDocument) throws Exception {
|
||||
String userAgent = ingestDocument.getFieldValue(field, String.class);
|
||||
|
||||
Details uaClient = parser.parse(userAgent);
|
||||
|
||||
Map<String, Object> uaDetails = new HashMap<>();
|
||||
for (Property property : this.properties) {
|
||||
switch (property) {
|
||||
case NAME:
|
||||
if (uaClient.userAgent != null && uaClient.userAgent.name != null) {
|
||||
uaDetails.put("name", uaClient.userAgent.name);
|
||||
}
|
||||
else {
|
||||
uaDetails.put("name", "Other");
|
||||
}
|
||||
break;
|
||||
case MAJOR:
|
||||
if (uaClient.userAgent != null && uaClient.userAgent.major != null) {
|
||||
uaDetails.put("major", uaClient.userAgent.major);
|
||||
}
|
||||
break;
|
||||
case MINOR:
|
||||
if (uaClient.userAgent != null && uaClient.userAgent.minor != null) {
|
||||
uaDetails.put("minor", uaClient.userAgent.minor);
|
||||
}
|
||||
break;
|
||||
case PATCH:
|
||||
if (uaClient.userAgent != null && uaClient.userAgent.patch != null) {
|
||||
uaDetails.put("patch", uaClient.userAgent.patch);
|
||||
}
|
||||
break;
|
||||
case BUILD:
|
||||
if (uaClient.userAgent != null && uaClient.userAgent.build != null) {
|
||||
uaDetails.put("build", uaClient.userAgent.build);
|
||||
}
|
||||
break;
|
||||
case OS:
|
||||
if (uaClient.operatingSystem != null) {
|
||||
uaDetails.put("os", buildFullOSName(uaClient.operatingSystem));
|
||||
}
|
||||
else {
|
||||
uaDetails.put("os", "Other");
|
||||
}
|
||||
|
||||
break;
|
||||
case OS_NAME:
|
||||
if (uaClient.operatingSystem != null && uaClient.operatingSystem.name != null) {
|
||||
uaDetails.put("os_name", uaClient.operatingSystem.name);
|
||||
}
|
||||
else {
|
||||
uaDetails.put("os_name", "Other");
|
||||
}
|
||||
break;
|
||||
case OS_MAJOR:
|
||||
if (uaClient.operatingSystem != null && uaClient.operatingSystem.major != null) {
|
||||
uaDetails.put("os_major", uaClient.operatingSystem.major);
|
||||
}
|
||||
break;
|
||||
case OS_MINOR:
|
||||
if (uaClient.operatingSystem != null && uaClient.operatingSystem.minor != null) {
|
||||
uaDetails.put("os_minor", uaClient.operatingSystem.minor);
|
||||
}
|
||||
break;
|
||||
case DEVICE:
|
||||
if (uaClient.device != null && uaClient.device.name != null) {
|
||||
uaDetails.put("device", uaClient.device.name);
|
||||
}
|
||||
else {
|
||||
uaDetails.put("device", "Other");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ingestDocument.setFieldValue(targetField, uaDetails);
|
||||
}
|
||||
|
||||
/** To maintain compatibility with logstash-filter-useragent */
|
||||
private String buildFullOSName(VersionedName operatingSystem) {
|
||||
if (operatingSystem == null || operatingSystem.name == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
StringBuilder sb = new StringBuilder(operatingSystem.name);
|
||||
|
||||
if (operatingSystem.major != null) {
|
||||
sb.append(" ");
|
||||
sb.append(operatingSystem.major);
|
||||
|
||||
if (operatingSystem.minor != null) {
|
||||
sb.append(".");
|
||||
sb.append(operatingSystem.minor);
|
||||
|
||||
if (operatingSystem.patch != null) {
|
||||
sb.append(".");
|
||||
sb.append(operatingSystem.patch);
|
||||
|
||||
if (operatingSystem.build != null) {
|
||||
sb.append(".");
|
||||
sb.append(operatingSystem.build);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getType() {
|
||||
return TYPE;
|
||||
}
|
||||
|
||||
String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
String getTargetField() {
|
||||
return targetField;
|
||||
}
|
||||
|
||||
Set<Property> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
UserAgentParser getUaParser() {
|
||||
return parser;
|
||||
}
|
||||
|
||||
public static final class Factory extends AbstractProcessorFactory<UserAgentProcessor> {
|
||||
|
||||
private final Map<String, UserAgentParser> userAgentParsers;
|
||||
|
||||
public Factory(Map<String, UserAgentParser> userAgentParsers) {
|
||||
this.userAgentParsers = userAgentParsers;
|
||||
}
|
||||
|
||||
@Override
|
||||
public UserAgentProcessor doCreate(String processorTag, Map<String, Object> config) throws Exception {
|
||||
String field = readStringProperty(TYPE, processorTag, config, "field");
|
||||
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "useragent");
|
||||
String regexFilename = readStringProperty(TYPE, processorTag, config, "regex_file", IngestUserAgentPlugin.DEFAULT_PARSER_NAME);
|
||||
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
|
||||
|
||||
UserAgentParser parser = userAgentParsers.get(regexFilename);
|
||||
if (parser == null) {
|
||||
throw newConfigurationException(TYPE, processorTag,
|
||||
"regex_file", "regex file [" + regexFilename + "] doesn't exist (has to exist at node startup)");
|
||||
}
|
||||
|
||||
final Set<Property> properties;
|
||||
if (propertyNames != null) {
|
||||
properties = EnumSet.noneOf(Property.class);
|
||||
for (String fieldName : propertyNames) {
|
||||
try {
|
||||
properties.add(Property.parseProperty(fieldName));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw newConfigurationException(TYPE, processorTag, "properties", e.getMessage());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
properties = EnumSet.allOf(Property.class);
|
||||
}
|
||||
|
||||
return new UserAgentProcessor(processorTag, field, targetField, parser, properties);
|
||||
}
|
||||
}
|
||||
|
||||
enum Property {
|
||||
|
||||
NAME, MAJOR, MINOR, PATCH, OS, OS_NAME, OS_MAJOR, OS_MINOR, DEVICE, BUILD;
|
||||
|
||||
public static Property parseProperty(String propertyName) {
|
||||
try {
|
||||
return valueOf(propertyName.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException("illegal property value [" + propertyName + "]. valid values are " +
|
||||
Arrays.toString(EnumSet.allOf(Property.class).toArray()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
4815
plugins/ingest-useragent/src/main/resources/regexes.yaml
Normal file
4815
plugins/ingest-useragent/src/main/resources/regexes.yaml
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.useragent;
|
||||
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.ingest.AbstractProcessorFactory;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.greaterThan;
|
||||
|
||||
public class UserAgentProcessorFactoryTests extends ESTestCase {
|
||||
|
||||
private static Map<String, UserAgentParser> userAgentParsers;
|
||||
|
||||
private static String regexWithoutDevicesFilename = "regexes_without_devices.yaml";
|
||||
private static Path userAgentConfigDir;
|
||||
|
||||
@BeforeClass
|
||||
public static void createUserAgentParsers() throws IOException {
|
||||
Path configDir = createTempDir();
|
||||
userAgentConfigDir = configDir.resolve("ingest-useragent");
|
||||
Files.createDirectories(userAgentConfigDir);
|
||||
|
||||
// Copy file, leaving out the device parsers at the end
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(UserAgentProcessor.class.getResourceAsStream("/regexes.yaml"), StandardCharsets.UTF_8));
|
||||
BufferedWriter writer = Files.newBufferedWriter(userAgentConfigDir.resolve(regexWithoutDevicesFilename));) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
if (line.startsWith("device_parsers:")) {
|
||||
break;
|
||||
}
|
||||
|
||||
writer.write(line);
|
||||
writer.newLine();
|
||||
}
|
||||
}
|
||||
|
||||
userAgentParsers = IngestUserAgentPlugin.createUserAgentParsers(userAgentConfigDir, new UserAgentCache(1000));
|
||||
}
|
||||
|
||||
public void testBuildDefaults() throws Exception {
|
||||
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "_field");
|
||||
|
||||
String processorTag = randomAsciiOfLength(10);
|
||||
config.put(AbstractProcessorFactory.TAG_KEY, processorTag);
|
||||
|
||||
UserAgentProcessor processor = factory.create(config);
|
||||
assertThat(processor.getTag(), equalTo(processorTag));
|
||||
assertThat(processor.getField(), equalTo("_field"));
|
||||
assertThat(processor.getTargetField(), equalTo("useragent"));
|
||||
assertThat(processor.getUaParser().getUaPatterns().size(), greaterThan(0));
|
||||
assertThat(processor.getUaParser().getOsPatterns().size(), greaterThan(0));
|
||||
assertThat(processor.getUaParser().getDevicePatterns().size(), greaterThan(0));
|
||||
assertThat(processor.getProperties(), equalTo(EnumSet.allOf(UserAgentProcessor.Property.class)));
|
||||
}
|
||||
|
||||
public void testBuildTargetField() throws Exception {
|
||||
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "_field");
|
||||
config.put("target_field", "_target_field");
|
||||
|
||||
UserAgentProcessor processor = factory.create(config);
|
||||
assertThat(processor.getField(), equalTo("_field"));
|
||||
assertThat(processor.getTargetField(), equalTo("_target_field"));
|
||||
}
|
||||
|
||||
public void testBuildRegexFile() throws Exception {
|
||||
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "_field");
|
||||
config.put("regex_file", regexWithoutDevicesFilename);
|
||||
|
||||
UserAgentProcessor processor = factory.create(config);
|
||||
assertThat(processor.getField(), equalTo("_field"));
|
||||
assertThat(processor.getUaParser().getUaPatterns().size(), greaterThan(0));
|
||||
assertThat(processor.getUaParser().getOsPatterns().size(), greaterThan(0));
|
||||
assertThat(processor.getUaParser().getDevicePatterns().size(), equalTo(0));
|
||||
}
|
||||
|
||||
public void testBuildNonExistingRegexFile() throws Exception {
|
||||
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "_field");
|
||||
config.put("regex_file", "does-not-exist.yaml");
|
||||
|
||||
ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config));
|
||||
assertThat(e.getMessage(), equalTo("[regex_file] regex file [does-not-exist.yaml] doesn't exist (has to exist at node startup)"));
|
||||
}
|
||||
|
||||
public void testBuildFields() throws Exception {
|
||||
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
|
||||
|
||||
Set<UserAgentProcessor.Property> properties = EnumSet.noneOf(UserAgentProcessor.Property.class);
|
||||
List<String> fieldNames = new ArrayList<>();
|
||||
int numFields = scaledRandomIntBetween(1, UserAgentProcessor.Property.values().length);
|
||||
for (int i = 0; i < numFields; i++) {
|
||||
UserAgentProcessor.Property property = UserAgentProcessor.Property.values()[i];
|
||||
properties.add(property);
|
||||
fieldNames.add(property.name().toLowerCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "_field");
|
||||
config.put("properties", fieldNames);
|
||||
|
||||
UserAgentProcessor processor = factory.create(config);
|
||||
assertThat(processor.getField(), equalTo("_field"));
|
||||
assertThat(processor.getProperties(), equalTo(properties));
|
||||
}
|
||||
|
||||
public void testInvalidProperty() throws Exception {
|
||||
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "_field");
|
||||
config.put("properties", Collections.singletonList("invalid"));
|
||||
|
||||
ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config));
|
||||
assertThat(e.getMessage(), equalTo("[properties] illegal property value [invalid]. valid values are [NAME, MAJOR, MINOR, "
|
||||
+ "PATCH, OS, OS_NAME, OS_MAJOR, OS_MINOR, DEVICE, BUILD]"));
|
||||
}
|
||||
|
||||
public void testInvalidPropertiesType() throws Exception {
|
||||
UserAgentProcessor.Factory factory = new UserAgentProcessor.Factory(userAgentParsers);
|
||||
|
||||
Map<String, Object> config = new HashMap<>();
|
||||
config.put("field", "_field");
|
||||
config.put("properties", "invalid");
|
||||
|
||||
ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(config));
|
||||
assertThat(e.getMessage(), equalTo("[properties] property isn't a list, but of type [java.lang.String]"));
|
||||
}
|
||||
}
|
@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.useragent;
|
||||
|
||||
import org.elasticsearch.ingest.RandomDocumentPicks;
|
||||
import org.elasticsearch.ingest.IngestDocument;
|
||||
import org.elasticsearch.ingest.useragent.UserAgentProcessor;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.hamcrest.Matchers.hasKey;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class UserAgentProcessorTests extends ESTestCase {
|
||||
|
||||
private static UserAgentProcessor processor;
|
||||
|
||||
@BeforeClass
|
||||
public static void setupProcessor() throws IOException {
|
||||
InputStream regexStream = UserAgentProcessor.class.getResourceAsStream("/regexes.yaml");
|
||||
assertNotNull(regexStream);
|
||||
|
||||
UserAgentParser parser = new UserAgentParser(randomAsciiOfLength(10), regexStream, new UserAgentCache(1000));
|
||||
|
||||
processor = new UserAgentProcessor(randomAsciiOfLength(10), "source_field", "target_field", parser,
|
||||
EnumSet.allOf(UserAgentProcessor.Property.class));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testCommonBrowser() throws Exception {
|
||||
Map<String, Object> document = new HashMap<>();
|
||||
document.put("source_field",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36");
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
|
||||
|
||||
processor.execute(ingestDocument);
|
||||
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
|
||||
|
||||
assertThat(data, hasKey("target_field"));
|
||||
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
|
||||
|
||||
assertThat(target.get("name"), is("Chrome"));
|
||||
assertThat(target.get("major"), is("33"));
|
||||
assertThat(target.get("minor"), is("0"));
|
||||
assertThat(target.get("patch"), is("1750"));
|
||||
assertNull(target.get("build"));
|
||||
|
||||
assertThat(target.get("os"), is("Mac OS X 10.9.2"));
|
||||
assertThat(target.get("os_name"), is("Mac OS X"));
|
||||
assertThat(target.get("os_major"), is("10"));
|
||||
assertThat(target.get("os_minor"), is("9"));
|
||||
|
||||
assertThat(target.get("device"), is("Other"));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testUncommonDevice() throws Exception {
|
||||
Map<String, Object> document = new HashMap<>();
|
||||
document.put("source_field",
|
||||
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10+ "
|
||||
+ "(KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2");
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
|
||||
|
||||
processor.execute(ingestDocument);
|
||||
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
|
||||
|
||||
assertThat(data, hasKey("target_field"));
|
||||
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
|
||||
|
||||
assertThat(target.get("name"), is("Android"));
|
||||
assertThat(target.get("major"), is("3"));
|
||||
assertThat(target.get("minor"), is("0"));
|
||||
assertNull(target.get("patch"));
|
||||
assertNull(target.get("build"));
|
||||
|
||||
assertThat(target.get("os"), is("Android 3.0"));
|
||||
assertThat(target.get("os_name"), is("Android"));
|
||||
assertThat(target.get("os_major"), is("3"));
|
||||
assertThat(target.get("os_minor"), is("0"));
|
||||
|
||||
assertThat(target.get("device"), is("Motorola Xoom"));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testSpider() throws Exception {
|
||||
Map<String, Object> document = new HashMap<>();
|
||||
document.put("source_field",
|
||||
"Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)");
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
|
||||
|
||||
processor.execute(ingestDocument);
|
||||
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
|
||||
|
||||
assertThat(data, hasKey("target_field"));
|
||||
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
|
||||
|
||||
assertThat(target.get("name"), is("EasouSpider"));
|
||||
assertNull(target.get("major"));
|
||||
assertNull(target.get("minor"));
|
||||
assertNull(target.get("patch"));
|
||||
assertNull(target.get("build"));
|
||||
|
||||
assertThat(target.get("os"), is("Other"));
|
||||
assertThat(target.get("os_name"), is("Other"));
|
||||
assertNull(target.get("os_major"));
|
||||
assertNull(target.get("os_minor"));
|
||||
|
||||
assertThat(target.get("device"), is("Spider"));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testUnknown() throws Exception {
|
||||
Map<String, Object> document = new HashMap<>();
|
||||
document.put("source_field",
|
||||
"Something I made up v42.0.1");
|
||||
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
|
||||
|
||||
processor.execute(ingestDocument);
|
||||
Map<String, Object> data = ingestDocument.getSourceAndMetadata();
|
||||
|
||||
assertThat(data, hasKey("target_field"));
|
||||
Map<String, Object> target = (Map<String, Object>) data.get("target_field");
|
||||
|
||||
assertThat(target.get("name"), is("Other"));
|
||||
assertNull(target.get("major"));
|
||||
assertNull(target.get("minor"));
|
||||
assertNull(target.get("patch"));
|
||||
assertNull(target.get("build"));
|
||||
|
||||
assertThat(target.get("os"), is("Other"));
|
||||
assertThat(target.get("os_name"), is("Other"));
|
||||
assertNull(target.get("os_major"));
|
||||
assertNull(target.get("os_minor"));
|
||||
|
||||
assertThat(target.get("device"), is("Other"));
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.useragent;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.elasticsearch.test.rest.ESRestTestCase;
|
||||
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class UserAgentRestIT extends ESRestTestCase {
|
||||
|
||||
public UserAgentRestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||
super(testCandidate);
|
||||
}
|
||||
|
||||
@ParametersFactory
|
||||
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||
return ESRestTestCase.createParameters(0, 1);
|
||||
}
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
"ingest-useragent plugin installed":
|
||||
- do:
|
||||
cluster.state: {}
|
||||
|
||||
- set: {master_node: master}
|
||||
|
||||
- do:
|
||||
nodes.info: {}
|
||||
|
||||
- match: { nodes.$master.plugins.0.name: ingest-useragent }
|
||||
- match: { nodes.$master.ingest.processors.0.type: useragent }
|
@ -0,0 +1,86 @@
|
||||
---
|
||||
"Test user agent processor with defaults":
|
||||
- do:
|
||||
ingest.put_pipeline:
|
||||
id: "my_pipeline"
|
||||
body: >
|
||||
{
|
||||
"description": "_description",
|
||||
"processors": [
|
||||
{
|
||||
"useragent" : {
|
||||
"field" : "field1"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
- match: { acknowledged: true }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
pipeline: "my_pipeline"
|
||||
body: {field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36"}
|
||||
|
||||
- do:
|
||||
get:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
- match: { _source.field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" }
|
||||
- match: { _source.useragent.name: "Chrome" }
|
||||
- match: { _source.useragent.os: "Mac OS X 10.9.2" }
|
||||
- match: { _source.useragent.os_name: "Mac OS X" }
|
||||
- match: { _source.useragent.os_major: "10" }
|
||||
- match: { _source.useragent.os_minor: "9" }
|
||||
- match: { _source.useragent.major: "33" }
|
||||
- match: { _source.useragent.minor: "0" }
|
||||
- match: { _source.useragent.patch: "1750" }
|
||||
- match: { _source.useragent.device: "Other" }
|
||||
|
||||
---
|
||||
"Test user agent processor with parameters":
|
||||
- do:
|
||||
ingest.put_pipeline:
|
||||
id: "my_pipeline"
|
||||
body: >
|
||||
{
|
||||
"description": "_description",
|
||||
"processors": [
|
||||
{
|
||||
"useragent" : {
|
||||
"field" : "field1",
|
||||
"target_field": "field2",
|
||||
"properties": ["os"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
- match: { acknowledged: true }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
pipeline: "my_pipeline"
|
||||
body: {field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36"}
|
||||
|
||||
- do:
|
||||
get:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
- match: { _source.field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" }
|
||||
- match: { _source.field2.os: "Mac OS X 10.9.2" }
|
||||
- is_false: _source.useragent
|
||||
- is_false: _source.field2.name
|
||||
- is_false: _source.field2.os_name
|
||||
- is_false: _source.field2.os_major
|
||||
- is_false: _source.field2.os_minor
|
||||
- is_false: _source.field2.major
|
||||
- is_false: _source.field2.minor
|
||||
- is_false: _source.field2.patch
|
||||
- is_false: _source.field2.device
|
@ -0,0 +1,42 @@
|
||||
---
|
||||
"Test user agent processor with custom regex file":
|
||||
- do:
|
||||
ingest.put_pipeline:
|
||||
id: "my_pipeline"
|
||||
body: >
|
||||
{
|
||||
"description": "_description",
|
||||
"processors": [
|
||||
{
|
||||
"useragent" : {
|
||||
"field": "field1",
|
||||
"regex_file": "test-regexes.yaml"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
- match: { acknowledged: true }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
pipeline: "my_pipeline"
|
||||
body: {field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36"}
|
||||
|
||||
- do:
|
||||
get:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
- match: { _source.field1: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36" }
|
||||
- match: { _source.useragent.name: "Test" }
|
||||
- match: { _source.useragent.os: "Other" }
|
||||
- match: { _source.useragent.os_name: "Other" }
|
||||
- match: { _source.useragent.device: "Other" }
|
||||
- is_false: _source.useragent.os_major
|
||||
- is_false: _source.useragent.os_minor
|
||||
- is_false: _source.useragent.major
|
||||
- is_false: _source.useragent.minor
|
||||
- is_false: _source.useragent.patch
|
3
plugins/ingest-useragent/test/test-regexes.yaml
Normal file
3
plugins/ingest-useragent/test/test-regexes.yaml
Normal file
@ -0,0 +1,3 @@
|
||||
user_agent_parsers:
|
||||
- regex: '.*'
|
||||
family_replacement: 'Test'
|
@ -36,6 +36,7 @@ List projects = [
|
||||
'plugins:discovery-gce',
|
||||
'plugins:ingest-geoip',
|
||||
'plugins:ingest-attachment',
|
||||
'plugins:ingest-useragent',
|
||||
'plugins:lang-javascript',
|
||||
'plugins:lang-python',
|
||||
'plugins:mapper-attachments',
|
||||
|
Loading…
x
Reference in New Issue
Block a user