Package ingest-user-agent as a module (#36956)

This commit moves ingest-user-agent from being a plugin to being a
module that is packaged with Elasticsearch distributions.
This commit is contained in:
Jason Tedor 2018-12-22 20:20:53 -05:00 committed by GitHub
parent d238b2934c
commit 1f574bd17a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 168 additions and 150 deletions

View File

@ -12,7 +12,6 @@ configurations {
} }
dependencies { dependencies {
dockerPlugins project(path: ":plugins:ingest-user-agent", configuration: 'zip')
dockerSource project(path: ":distribution:archives:tar") dockerSource project(path: ":distribution:archives:tar")
ossDockerSource project(path: ":distribution:archives:oss-tar") ossDockerSource project(path: ":distribution:archives:oss-tar")
} }
@ -23,7 +22,6 @@ ext.expansions = { oss ->
'jdkUrl' : 'https://download.java.net/java/GA/jdk11/13/GPL/openjdk-11.0.1_linux-x64_bin.tar.gz', 'jdkUrl' : 'https://download.java.net/java/GA/jdk11/13/GPL/openjdk-11.0.1_linux-x64_bin.tar.gz',
'jdkVersion' : '11.0.1', 'jdkVersion' : '11.0.1',
'license': oss ? 'Apache-2.0' : 'Elastic License', 'license': oss ? 'Apache-2.0' : 'Elastic License',
'ingest-user-agent' : "ingest-user-agent-${VersionProperties.elasticsearch}.zip",
'version' : VersionProperties.elasticsearch 'version' : VersionProperties.elasticsearch
] ]
} }

View File

@ -30,9 +30,8 @@ RUN groupadd -g 1000 elasticsearch && \
WORKDIR /usr/share/elasticsearch WORKDIR /usr/share/elasticsearch
COPY ${elasticsearch} ${ingest-user-agent} /opt/ COPY ${elasticsearch} /opt/
RUN tar zxf /opt/${elasticsearch} --strip-components=1 RUN tar zxf /opt/${elasticsearch} --strip-components=1
RUN elasticsearch-plugin install --batch file:///opt/${ingest-user-agent}
RUN mkdir -p config data logs RUN mkdir -p config data logs
RUN chmod 0775 config data logs RUN chmod 0775 config data logs
COPY config/elasticsearch.yml config/log4j2.properties config/ COPY config/elasticsearch.yml config/log4j2.properties config/

View File

@ -222,8 +222,10 @@ class InstallPluginCommand extends EnvironmentAwareCommand {
throw new UserException(ExitCodes.USAGE, "plugin id is required"); throw new UserException(ExitCodes.USAGE, "plugin id is required");
} }
if ("ingest-geoip".equals(pluginId)) { if ("ingest-geoip".equals(pluginId) || "ingest-user-agent".equals(pluginId)) {
handleInstallIngestGeoIp(); throw new UserException(
ExitCodes.OK,
"[" + pluginId + "] is no longer a plugin but instead a module packaged with this distribution of Elasticsearch");
} }
if ("x-pack".equals(pluginId)) { if ("x-pack".equals(pluginId)) {
@ -235,12 +237,6 @@ class InstallPluginCommand extends EnvironmentAwareCommand {
install(terminal, isBatch, extractedZip, env); install(terminal, isBatch, extractedZip, env);
} }
private static void handleInstallIngestGeoIp() throws UserException {
throw new UserException(
ExitCodes.OK,
"ingest-geoip is no longer a plugin but instead a module packaged with this distribution of Elasticsearch");
}
Build.Flavor buildFlavor() { Build.Flavor buildFlavor() {
return Build.CURRENT.flavor(); return Build.CURRENT.flavor();
} }

View File

@ -21,11 +21,11 @@ package org.elasticsearch.plugins;
import joptsimple.OptionSet; import joptsimple.OptionSet;
import joptsimple.OptionSpec; import joptsimple.OptionSpec;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.cli.EnvironmentAwareCommand; import org.elasticsearch.cli.EnvironmentAwareCommand;
import org.elasticsearch.cli.ExitCodes; import org.elasticsearch.cli.ExitCodes;
import org.elasticsearch.cli.Terminal; import org.elasticsearch.cli.Terminal;
import org.elasticsearch.cli.UserException; import org.elasticsearch.cli.UserException;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import java.io.IOException; import java.io.IOException;
@ -112,11 +112,14 @@ class RemovePluginCommand extends EnvironmentAwareCommand {
if ((!Files.exists(pluginDir) && !Files.exists(pluginConfigDir) && !Files.exists(removing)) if ((!Files.exists(pluginDir) && !Files.exists(pluginConfigDir) && !Files.exists(removing))
|| (!Files.exists(pluginDir) && Files.exists(pluginConfigDir) && !purge)) { || (!Files.exists(pluginDir) && Files.exists(pluginConfigDir) && !purge)) {
// special case for ingest-geoip since it is a module now but could have been installed from a previous when it was a plugin /*
if ("ingest-geoip".equals(pluginName)) { * This is special case handling for ingest-geoip and ingest-user-agent since they are modules now but could have been installed
* from a previous when it was a plugin.
*/
if ("ingest-geoip".equals(pluginName) || "ingest-user-agent".equals(pluginName)) {
throw new UserException( throw new UserException(
ExitCodes.OK, ExitCodes.OK,
"ingest-geoip is no longer a plugin but instead a module packaged with this distribution of Elasticsearch"); "[" + pluginName + "] is no longer a plugin but instead a module packaged with this distribution of Elasticsearch");
} }
final String message = String.format( final String message = String.format(

View File

@ -757,15 +757,25 @@ public class InstallPluginCommandTests extends ESTestCase {
} }
} }
public void testInstallGeoIp() throws IOException { public void testInstallIngestGeoIp() throws IOException {
runInstallIngestGeoIpOrIngestUserAgentTest("ingest-geoip");
}
public void testInstallIngestUserAgent() throws IOException {
runInstallIngestGeoIpOrIngestUserAgentTest("ingest-user-agent");
}
private void runInstallIngestGeoIpOrIngestUserAgentTest(final String pluginId) throws IOException {
assert "ingest-geoip".equals(pluginId) || "ingest-user-agent".equals(pluginId) : pluginId;
final Environment environment = createEnv(fs, temp).v2(); final Environment environment = createEnv(fs, temp).v2();
final UserException exception = final UserException exception =
expectThrows(UserException.class, () -> new InstallPluginCommand().execute(terminal, "ingest-geoip", false, environment)); expectThrows(UserException.class, () -> new InstallPluginCommand().execute(terminal, pluginId, false, environment));
assertThat(exception.exitCode, equalTo(ExitCodes.OK)); assertThat(exception.exitCode, equalTo(ExitCodes.OK));
assertThat( assertThat(
exception, exception,
hasToString(containsString( hasToString(containsString(
"ingest-geoip is no longer a plugin but instead a module packaged with this distribution of Elasticsearch"))); "[" + pluginId + "] is no longer a plugin but instead a module packaged with this distribution of Elasticsearch")));
} }
public void testInstallXPack() throws IOException { public void testInstallXPack() throws IOException {

View File

@ -259,24 +259,48 @@ public class RemovePluginCommandTests extends ESTestCase {
* @throws Exception if an exception is thrown creating or removing the plugin * @throws Exception if an exception is thrown creating or removing the plugin
*/ */
public void testRemoveIngestGeoIp() throws Exception { public void testRemoveIngestGeoIp() throws Exception {
runTestRemoveIngestGeoIpOrIngestUserAgent("ingest-geoip");
}
/**
* The ingest-user-agent plugin receives special handling because we have re-packaged it as a module; this test ensures that we are
* still able to uninstall an old installation of ingest-user-agent.
*
* @throws Exception if an exception is thrown creating or removing the plugin
*/
public void testRemoveIngestUserAgent() throws Exception {
runTestRemoveIngestGeoIpOrIngestUserAgent("ingest-user-agent");
}
private void runTestRemoveIngestGeoIpOrIngestUserAgent(final String name) throws Exception {
assert "ingest-geoip".equals(name) || "ingest-user-agent".equals(name) : name;
createPlugin( createPlugin(
"ingest-geoip", name,
VersionUtils.randomVersionBetween( VersionUtils.randomVersionBetween(
random(), random(),
Version.CURRENT.minimumIndexCompatibilityVersion(), Version.CURRENT.minimumIndexCompatibilityVersion(),
Version.V_6_6_0)); Version.V_6_6_0));
removePlugin("ingest-geoip", home, randomBoolean()); removePlugin(name, home, randomBoolean());
assertThat(Files.exists(env.pluginsFile().resolve("ingest-geoip")), equalTo(false)); assertThat(Files.exists(env.pluginsFile().resolve(name)), equalTo(false));
assertRemoveCleaned(env); assertRemoveCleaned(env);
} }
public void testRemoveIngestGeoIpWhenNotInstalled() { public void testRemoveIngestGeoIpWhenNotInstalled() {
final UserException e = expectThrows(UserException.class, () -> removePlugin("ingest-geoip", home, randomBoolean())); runTestRemoveIngestGeoIpOrIngestUserAgentWhenNotInstalled("ingest-geoip");
}
public void testRemoveIngestUserAgentWhenNotInstalled() {
runTestRemoveIngestGeoIpOrIngestUserAgentWhenNotInstalled("ingest-user-agent");
}
private void runTestRemoveIngestGeoIpOrIngestUserAgentWhenNotInstalled(final String name) {
assert "ingest-geoip".equals(name) || "ingest-user-agent".equals(name) : name;
final UserException e = expectThrows(UserException.class, () -> removePlugin(name, home, randomBoolean()));
assertThat(e.exitCode, equalTo(ExitCodes.OK)); assertThat(e.exitCode, equalTo(ExitCodes.OK));
assertThat( assertThat(
e, e,
hasToString(Matchers.containsString( hasToString(Matchers.containsString(
"ingest-geoip is no longer a plugin but instead a module packaged with this distribution of Elasticsearch"))); "[" + name + "] is no longer a plugin but instead a module packaged with this distribution of Elasticsearch")));
} }
public void testRemoveWhenRemovingMarker() throws Exception { public void testRemoveWhenRemovingMarker() throws Exception {

View File

@ -1,88 +1,7 @@
[[ingest-user-agent]] [[ingest-user-agent]]
=== Ingest user agent processor plugin === Ingest `user_agent` Processor Plugin
The `user_agent` processor extracts details from the user agent string a browser sends with its web requests. The `user_agent` processor is no longer distributed as a plugin, but is now a module
This processor adds this information by default under the `user_agent` field. distributed by default with Elasticsearch. See
{ref}/ingest-user-agent.html[Ingest `user_agent` processor] for more details.
The ingest-user-agent plugin ships by default with the regexes.yaml made available by uap-java with an Apache 2.0 license. For more details see https://github.com/ua-parser/uap-core.
:plugin_name: ingest-user-agent
include::install_remove.asciidoc[]
[[using-ingest-user-agent]]
==== Using the user_agent Processor in a Pipeline
[[ingest-user-agent-options]]
.User-agent options
[options="header"]
|======
| Name | Required | Default | Description
| `field` | yes | - | The field containing the user agent string.
| `target_field` | no | user_agent | The field that will be filled with the user agent details.
| `regex_file` | no | - | The name of the file in the `config/ingest-user-agent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-user-agent will use the regexes.yaml from uap-core it ships with (see below).
| `properties` | no | [`name`, `major`, `minor`, `patch`, `build`, `os`, `os_name`, `os_major`, `os_minor`, `device`] | Controls what properties are added to `target_field`.
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|======
Here is an example that adds the user agent details to the `user_agent` field based on the `agent` field:
[source,js]
--------------------------------------------------
PUT _ingest/pipeline/user_agent
{
"description" : "Add user agent information",
"processors" : [
{
"user_agent" : {
"field" : "agent"
}
}
]
}
PUT my_index/_doc/my_id?pipeline=user_agent
{
"agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
}
GET my_index/_doc/my_id
--------------------------------------------------
// CONSOLE
Which returns
[source,js]
--------------------------------------------------
{
"found": true,
"_index": "my_index",
"_type": "_doc",
"_id": "my_id",
"_version": 1,
"_seq_no": 22,
"_primary_term": 1,
"_source": {
"agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
"user_agent": {
"name": "Chrome",
"major": "51",
"minor": "0",
"patch": "2704",
"os_name": "Mac OS X",
"os": "Mac OS X 10.10.5",
"os_major": "10",
"os_minor": "10",
"device": "Other"
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term": 1/"_primary_term" : $body._primary_term/]
===== Using a custom regex file
To use a custom regex file for parsing the user agents, that file has to be put into the `config/ingest-user-agent` directory and
has to have a `.yaml` filename extension. The file has to be present at node startup, any changes to it or any new files added
while the node is running will not have any effect.
In practice, it will make most sense for any custom regex file to be a variant of the default file, either a more recent version
or a customised version.
The default file included in `ingest-user-agent` is the `regexes.yaml` from uap-core: https://github.com/ua-parser/uap-core/blob/master/regexes.yaml

View File

@ -24,7 +24,10 @@ details.
<<ingest-user-agent>>:: <<ingest-user-agent>>::
A processor that extracts details from the User-Agent header value. A processor that extracts details from the User-Agent header value. The
`user_agent` processor is no longer distributed as a plugin, but is now a module
distributed by default with Elasticsearch. See
{ref}/ingest-user-agent.html[Ingest `user_agent` processor] for more details.
[float] [float]
=== Community contributed ingest plugins === Community contributed ingest plugins

View File

@ -25,7 +25,6 @@ U7321H6 discovery-azure-classic {version_qualified} The Azure Classic Discovery
U7321H6 discovery-ec2 {version_qualified} The EC2 discovery plugin allows to use AWS API for the unicast discovery mechanism. U7321H6 discovery-ec2 {version_qualified} The EC2 discovery plugin allows to use AWS API for the unicast discovery mechanism.
U7321H6 discovery-gce {version_qualified} The Google Compute Engine (GCE) Discovery plugin allows to use GCE API for the unicast discovery mechanism. U7321H6 discovery-gce {version_qualified} The Google Compute Engine (GCE) Discovery plugin allows to use GCE API for the unicast discovery mechanism.
U7321H6 ingest-attachment {version_qualified} Ingest processor that uses Apache Tika to extract contents U7321H6 ingest-attachment {version_qualified} Ingest processor that uses Apache Tika to extract contents
U7321H6 ingest-user-agent {version_qualified} Ingest processor that extracts information from a user agent
U7321H6 mapper-annotated-text {version_qualified} The Mapper Annotated_text plugin adds support for text fields with markup used to inject annotation tokens into the index. U7321H6 mapper-annotated-text {version_qualified} The Mapper Annotated_text plugin adds support for text fields with markup used to inject annotation tokens into the index.
U7321H6 mapper-murmur3 {version_qualified} The Mapper Murmur3 plugin allows to compute hashes of a field's values at index-time and to store them in the index. U7321H6 mapper-murmur3 {version_qualified} The Mapper Murmur3 plugin allows to compute hashes of a field's values at index-time and to store them in the index.
U7321H6 mapper-size {version_qualified} The Mapper Size plugin allows document to record their uncompressed size at index time. U7321H6 mapper-size {version_qualified} The Mapper Size plugin allows document to record their uncompressed size at index time.

View File

@ -158,13 +158,6 @@ The result will look similar to:
"description": "The ICU Analysis plugin integrates Lucene ICU module into elasticsearch, adding ICU relates analysis components.", "description": "The ICU Analysis plugin integrates Lucene ICU module into elasticsearch, adding ICU relates analysis components.",
"classname": "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin", "classname": "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin",
"has_native_controller": false "has_native_controller": false
},
{
"name": "ingest-user-agent",
"version": "{version}",
"description": "Ingest processor that extracts information from a user agent",
"classname": "org.elasticsearch.ingest.useragent.IngestUserAgentPlugin",
"has_native_controller": false
} }
], ],
"modules": [ "modules": [

View File

@ -186,13 +186,6 @@ Will return, for example:
"classname": "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin", "classname": "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin",
"has_native_controller": false "has_native_controller": false
}, },
{
"name": "ingest-user-agent",
"version": "{version}",
"description": "Ingest processor that extracts information from a user agent",
"classname": "org.elasticsearch.ingest.useragent.IngestUserAgentPlugin",
"has_native_controller": false
},
... ...
], ],
"network_types": { "network_types": {

View File

@ -1351,3 +1351,4 @@ include::processors/sort.asciidoc[]
include::processors/trim.asciidoc[] include::processors/trim.asciidoc[]
include::processors/uppercase.asciidoc[] include::processors/uppercase.asciidoc[]
include::processors/url-decode.asciidoc[] include::processors/url-decode.asciidoc[]
include::processors/user-agent.asciidoc[]

View File

@ -0,0 +1,85 @@
[[ingest-user-agent]]
=== Ingest user agent processor
The `user_agent` processor extracts details from the user agent string a browser sends with its web requests.
This processor adds this information by default under the `user_agent` field.
The ingest-user-agent module ships by default with the regexes.yaml made available by uap-java with an Apache 2.0 license. For more details see https://github.com/ua-parser/uap-core.
[[using-ingest-user-agent]]
==== Using the user_agent Processor in a Pipeline
[[ingest-user-agent-options]]
.User-agent options
[options="header"]
|======
| Name | Required | Default | Description
| `field` | yes | - | The field containing the user agent string.
| `target_field` | no | user_agent | The field that will be filled with the user agent details.
| `regex_file` | no | - | The name of the file in the `config/ingest-user-agent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-user-agent will use the regexes.yaml from uap-core it ships with (see below).
| `properties` | no | [`name`, `major`, `minor`, `patch`, `build`, `os`, `os_name`, `os_major`, `os_minor`, `device`] | Controls what properties are added to `target_field`.
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
|======
Here is an example that adds the user agent details to the `user_agent` field based on the `agent` field:
[source,js]
--------------------------------------------------
PUT _ingest/pipeline/user_agent
{
"description" : "Add user agent information",
"processors" : [
{
"user_agent" : {
"field" : "agent"
}
}
]
}
PUT my_index/_doc/my_id?pipeline=user_agent
{
"agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
}
GET my_index/_doc/my_id
--------------------------------------------------
// CONSOLE
Which returns
[source,js]
--------------------------------------------------
{
"found": true,
"_index": "my_index",
"_type": "_doc",
"_id": "my_id",
"_version": 1,
"_seq_no": 22,
"_primary_term": 1,
"_source": {
"agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
"user_agent": {
"name": "Chrome",
"major": "51",
"minor": "0",
"patch": "2704",
"os_name": "Mac OS X",
"os": "Mac OS X 10.10.5",
"os_major": "10",
"os_minor": "10",
"device": "Other"
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term": 1/"_primary_term" : $body._primary_term/]
===== Using a custom regex file
To use a custom regex file for parsing the user agents, that file has to be put into the `config/ingest-user-agent` directory and
has to have a `.yaml` filename extension. The file has to be present at node startup, any changes to it or any new files added
while the node is running will not have any effect.
In practice, it will make most sense for any custom regex file to be a variant of the default file, either a more recent version
or a customised version.
The default file included in `ingest-user-agent` is the `regexes.yaml` from uap-core: https://github.com/ua-parser/uap-core/blob/master/regexes.yaml

View File

@ -23,5 +23,5 @@ esplugin {
} }
integTestCluster { integTestCluster {
extraConfigFile 'ingest-user-agent/test-regexes.yml', 'test/test-regexes.yml' extraConfigFile 'ingest-user-agent/test-regexes.yml', 'src/test/test-regexes.yml'
} }

View File

@ -198,4 +198,3 @@ public class UserAgentProcessorTests extends ESTestCase {
assertThat(target.get("device"), is("Other")); assertThat(target.get("device"), is("Other"));
} }
} }

View File

@ -1,4 +1,4 @@
"ingest-user-agent plugin installed": "ingest-user-agent installed":
- skip: - skip:
reason: "contains is a newly added assertion" reason: "contains is a newly added assertion"
features: contains features: contains
@ -10,5 +10,5 @@
- do: - do:
nodes.info: {} nodes.info: {}
- contains: { nodes.$master.plugins: { name: ingest-user-agent } } - contains: { nodes.$master.modules: { name: ingest-user-agent } }
- contains: { nodes.$master.ingest.processors: { type: user_agent } } - contains: { nodes.$master.ingest.processors: { type: user_agent } }

View File

@ -231,10 +231,6 @@ fi
install_and_check_plugin ingest attachment bcprov-jdk15on-*.jar tika-core-*.jar pdfbox-*.jar poi-4.0.0.jar poi-ooxml-4.0.0.jar poi-ooxml-schemas-*.jar poi-scratchpad-*.jar install_and_check_plugin ingest attachment bcprov-jdk15on-*.jar tika-core-*.jar pdfbox-*.jar poi-4.0.0.jar poi-ooxml-4.0.0.jar poi-ooxml-schemas-*.jar poi-scratchpad-*.jar
} }
@test "[$GROUP] install ingest-user-agent plugin" {
install_and_check_plugin ingest user-agent
}
@test "[$GROUP] check ingest-common module" { @test "[$GROUP] check ingest-common module" {
check_module ingest-common jcodings-*.jar joni-*.jar check_module ingest-common jcodings-*.jar joni-*.jar
} }
@ -243,6 +239,10 @@ fi
check_module ingest-geoip geoip2-*.jar jackson-annotations-*.jar jackson-databind-*.jar maxmind-db-*.jar check_module ingest-geoip geoip2-*.jar jackson-annotations-*.jar jackson-databind-*.jar maxmind-db-*.jar
} }
@test "[$GROUP] check ingest-user-agent module" {
check_module ingest-user-agent
}
@test "[$GROUP] check lang-expression module" { @test "[$GROUP] check lang-expression module" {
# we specify the version on the asm-5.0.4.jar so that the test does # we specify the version on the asm-5.0.4.jar so that the test does
# not spuriously pass if the jar is missing but the other asm jars # not spuriously pass if the jar is missing but the other asm jars
@ -364,10 +364,6 @@ fi
remove_plugin ingest-attachment remove_plugin ingest-attachment
} }
@test "[$GROUP] remove ingest-user-agent plugin" {
remove_plugin ingest-user-agent
}
@test "[$GROUP] remove murmur3 mapper plugin" { @test "[$GROUP] remove murmur3 mapper plugin" {
remove_plugin mapper-murmur3 remove_plugin mapper-murmur3
} }