ingest: Upgrade geoip processor's dependencies and database files

The database files have been doubled in size compared to the previous files being used.
For this reason the database files are now gzip compressed, which required using
`GZIPInputStream` when loading database files.
This commit is contained in:
Martijn van Groningen 2016-06-08 16:18:42 +02:00
parent 5161afe5e3
commit 3dd3ed4905
12 changed files with 44 additions and 37 deletions

View File

@ -8,8 +8,8 @@ The ingest-geoip plugin ships by default with the GeoLite2 City and GeoLite2 Cou
under the CCA-ShareAlike 3.0 license. For more details see, http://dev.maxmind.com/geoip/geoip2/geolite2/
The GeoIP processor can run with other geoip2 databases from Maxmind. The files must be copied into the geoip config directory,
and the `database_file` option should be used to specify the filename of the custom database. The geoip config directory
is located at `$ES_HOME/config/ingest/geoip` and holds the shipped databases too.
and the `database_file` option should be used to specify the filename of the custom database. Custom database files must be compressed
with gzip. The geoip config directory is located at `$ES_HOME/config/ingest/geoip` and holds the shipped databases too.
[[geoip-options]]
.Geoip options
@ -18,7 +18,7 @@ is located at `$ES_HOME/config/ingest/geoip` and holds the shipped databases too
| Name | Required | Default | Description
| `field` | yes | - | The field to get the ip address from for the geographical lookup.
| `target_field` | no | geoip | The field that will hold the geographical information looked up from the Maxmind database.
| `database_file` | no | GeoLite2-City.mmdb | The database filename in the geoip config directory. The ingest-geoip plugin ships with the GeoLite2-City.mmdb and GeoLite2-Country.mmdb files.
| `database_file` | no | GeoLite2-City.mmdb | The database filename in the geoip config directory. The ingest-geoip plugin ships with the GeoLite2-City.mmdb.gz and GeoLite2-Country.mmdb.gz files.
| `properties` | no | [`continent_name`, `country_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the geoip lookup.
|======

View File

@ -23,19 +23,19 @@ esplugin {
}
dependencies {
compile ('com.maxmind.geoip2:geoip2:2.6.0')
compile ('com.maxmind.geoip2:geoip2:2.7.0')
// geoip2 dependencies:
compile('com.fasterxml.jackson.core:jackson-annotations:2.7.1')
compile('com.fasterxml.jackson.core:jackson-databind:2.7.1')
compile('com.maxmind.db:maxmind-db:1.2.0')
compile('com.maxmind.db:maxmind-db:1.2.1')
testCompile 'org.elasticsearch:geolite2-databases:20151029'
testCompile 'org.elasticsearch:geolite2-databases:20160608'
}
task copyDefaultGeoIp2DatabaseFiles(type: Copy) {
from { zipTree(configurations.testCompile.files.find { it.name.contains('geolite2-databases')}) }
into "${project.buildDir}/ingest-geoip"
include "*.mmdb"
include "*.mmdb.gz"
}
project.bundlePlugin.dependsOn(copyDefaultGeoIp2DatabaseFiles)

View File

@ -1 +0,0 @@
2574c8b878f1cd39709559f1b96f1b5f0cdd69d3

View File

@ -0,0 +1 @@
2010d922191f5801939b462a5703ab79a7829626

View File

@ -1 +0,0 @@
b842823f24555f5d26608fef8122898365b3cd63

View File

@ -0,0 +1 @@
64b6b6a8c162fc9b0004fcdf9641cf1b408ffa33

View File

@ -233,7 +233,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
public GeoIpProcessor doCreate(String processorTag, Map<String, Object> config) throws Exception {
String ipField = readStringProperty(TYPE, processorTag, config, "field");
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "geoip");
String databaseFile = readStringProperty(TYPE, processorTag, config, "database_file", "GeoLite2-City.mmdb");
String databaseFile = readStringProperty(TYPE, processorTag, config, "database_file", "GeoLite2-City.mmdb.gz");
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
DatabaseReader databaseReader = databaseReaders.get(databaseFile);

View File

@ -34,6 +34,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
public class IngestGeoIpPlugin extends Plugin {
@ -60,13 +61,13 @@ public class IngestGeoIpPlugin extends Plugin {
Map<String, DatabaseReader> databaseReaders = new HashMap<>();
try (Stream<Path> databaseFiles = Files.list(geoIpConfigDirectory)) {
PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb");
PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb.gz");
// Use iterator instead of forEach otherwise IOException needs to be caught twice...
Iterator<Path> iterator = databaseFiles.iterator();
while (iterator.hasNext()) {
Path databasePath = iterator.next();
if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
try (InputStream inputStream = Files.newInputStream(databasePath, StandardOpenOption.READ)) {
try (InputStream inputStream = new GZIPInputStream(Files.newInputStream(databasePath, StandardOpenOption.READ))) {
databaseReaders.put(databasePath.getFileName().toString(), new DatabaseReader.Builder(inputStream).build());
}
}

View File

@ -54,8 +54,8 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
Path configDir = createTempDir();
Path geoIpConfigDir = configDir.resolve("ingest-geoip");
Files.createDirectories(geoIpConfigDir);
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")), geoIpConfigDir.resolve("GeoLite2-City.mmdb"));
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb")), geoIpConfigDir.resolve("GeoLite2-Country.mmdb"));
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb.gz")), geoIpConfigDir.resolve("GeoLite2-City.mmdb.gz"));
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb.gz")), geoIpConfigDir.resolve("GeoLite2-Country.mmdb.gz"));
databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir);
}
@ -89,7 +89,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("database_file", "GeoLite2-Country.mmdb");
config.put("database_file", "GeoLite2-Country.mmdb.gz");
String processorTag = randomAsciiOfLength(10);
config.put(AbstractProcessorFactory.TAG_KEY, processorTag);
@ -116,7 +116,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("database_file", "GeoLite2-Country.mmdb");
config.put("database_file", "GeoLite2-Country.mmdb.gz");
GeoIpProcessor processor = factory.create(config);
assertThat(processor.getField(), equalTo("_field"));
assertThat(processor.getTargetField(), equalTo("geoip"));
@ -128,7 +128,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("database_file", "GeoLite2-Country.mmdb");
config.put("database_file", "GeoLite2-Country.mmdb.gz");
EnumSet<GeoIpProcessor.Property> cityOnlyProperties = EnumSet.complementOf(GeoIpProcessor.Property.ALL_COUNTRY_PROPERTIES);
String cityProperty = RandomPicks.randomFrom(Randomness.get(), cityOnlyProperties).toString();
config.put("properties", Collections.singletonList(cityProperty));
@ -145,12 +145,12 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("database_file", "does-not-exist.mmdb");
config.put("database_file", "does-not-exist.mmdb.gz");
try {
factory.create(config);
fail("Exception expected");
} catch (ElasticsearchParseException e) {
assertThat(e.getMessage(), equalTo("[database_file] database file [does-not-exist.mmdb] doesn't exist"));
assertThat(e.getMessage(), equalTo("[database_file] database file [does-not-exist.mmdb.gz] doesn't exist"));
}
}

View File

@ -24,10 +24,12 @@ import org.elasticsearch.ingest.RandomDocumentPicks;
import org.elasticsearch.ingest.core.IngestDocument;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.io.InputStream;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
@ -35,33 +37,33 @@ import static org.hamcrest.Matchers.equalTo;
public class GeoIpProcessorTests extends ESTestCase {
public void testCity() throws Exception {
InputStream database = GeoIpProcessor.class.getResourceAsStream("/GeoLite2-City.mmdb");
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field", new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
Map<String, Object> document = new HashMap<>();
document.put("source_field", "82.170.213.79");
document.put("source_field", "8.8.8.8");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo("82.170.213.79"));
assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo("8.8.8.8"));
@SuppressWarnings("unchecked")
Map<String, Object> geoData = (Map<String, Object>) ingestDocument.getSourceAndMetadata().get("target_field");
assertThat(geoData.size(), equalTo(8));
assertThat(geoData.get("ip"), equalTo("82.170.213.79"));
assertThat(geoData.get("country_iso_code"), equalTo("NL"));
assertThat(geoData.get("country_name"), equalTo("Netherlands"));
assertThat(geoData.get("continent_name"), equalTo("Europe"));
assertThat(geoData.get("region_name"), equalTo("North Holland"));
assertThat(geoData.get("city_name"), equalTo("Amsterdam"));
assertThat(geoData.get("timezone"), equalTo("Europe/Amsterdam"));
assertThat(geoData.get("ip"), equalTo("8.8.8.8"));
assertThat(geoData.get("country_iso_code"), equalTo("US"));
assertThat(geoData.get("country_name"), equalTo("United States"));
assertThat(geoData.get("continent_name"), equalTo("North America"));
assertThat(geoData.get("region_name"), equalTo("California"));
assertThat(geoData.get("city_name"), equalTo("Mountain View"));
assertThat(geoData.get("timezone"), equalTo("America/Los_Angeles"));
Map<String, Object> location = new HashMap<>();
location.put("lat", 52.374d);
location.put("lon", 4.8897d);
location.put("lat", 37.386d);
location.put("lon", -122.0838d);
assertThat(geoData.get("location"), equalTo(location));
}
public void testCountry() throws Exception {
InputStream database = GeoIpProcessor.class.getResourceAsStream("/GeoLite2-Country.mmdb");
InputStream database = getDatabaseFileInputStream("/GeoLite2-Country.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field", new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
Map<String, Object> document = new HashMap<>();
@ -80,11 +82,11 @@ public class GeoIpProcessorTests extends ESTestCase {
}
public void testAddressIsNotInTheDatabase() throws Exception {
InputStream database = GeoIpProcessor.class.getResourceAsStream("/GeoLite2-City.mmdb");
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field", new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
Map<String, Object> document = new HashMap<>();
document.put("source_field", "202.45.11.11");
document.put("source_field", "127.0.0.1");
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document);
processor.execute(ingestDocument);
@SuppressWarnings("unchecked")
@ -94,7 +96,7 @@ public class GeoIpProcessorTests extends ESTestCase {
/** Don't silently do DNS lookups or anything trappy on bogus data */
public void testInvalid() throws Exception {
InputStream database = GeoIpProcessor.class.getResourceAsStream("/GeoLite2-City.mmdb");
InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz");
GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field", new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class));
Map<String, Object> document = new HashMap<>();
@ -109,4 +111,8 @@ public class GeoIpProcessorTests extends ESTestCase {
}
}
static InputStream getDatabaseFileInputStream(String path) throws IOException {
return new GZIPInputStream(GeoIpProcessor.class.getResourceAsStream(path));
}
}

View File

@ -98,7 +98,7 @@
{
"geoip" : {
"field" : "field1",
"database_file" : "GeoLite2-Country.mmdb"
"database_file" : "GeoLite2-Country.mmdb.gz"
}
}
]

View File

@ -67,7 +67,7 @@
- match: { _source.httpversion: "1.1" }
- match: { _source.timestamp: "2014-09-08T02:54:42.000Z" }
- match: { _source.geoip.continent_name: "North America" }
- match: { _source.geoip.city_name: "Charlotte" }
- match: { _source.geoip.city_name: "Fayetteville" }
- match: { _source.geoip.country_iso_code: "US" }
- match: { _source.geoip.region_name: "North Carolina" }