From 4704efaef4b08e1191ac81d35eaf20308dcc1274 Mon Sep 17 00:00:00 2001 From: Tal Levy Date: Tue, 13 Sep 2016 18:12:02 -0700 Subject: [PATCH] [ingest-geoip] do not insert null-valued fields in geoip response update geoip to not include null-valued results from database Originally, the plugin would still insert all the requested fields, but assign null to each one. This fixes that by not writing the fields at all. Makes for a better experience when the null fields conflict with the typical geo_point field mapping. --- docs/plugins/ingest-geoip.asciidoc | 45 ++++++++++++ .../ingest/geoip/GeoIpProcessor.java | 61 ++++++++++++---- .../ingest/geoip/GeoIpProcessorTests.java | 40 +++++++++- .../test/ingest_geoip/20_geoip_processor.yaml | 73 +++++++++++++++++++ 4 files changed, 202 insertions(+), 17 deletions(-) diff --git a/docs/plugins/ingest-geoip.asciidoc b/docs/plugins/ingest-geoip.asciidoc index 1626be6c8e6..ec70a125b65 100644 --- a/docs/plugins/ingest-geoip.asciidoc +++ b/docs/plugins/ingest-geoip.asciidoc @@ -154,3 +154,48 @@ returns this: } -------------------------------------------------- // TESTRESPONSE + + +Not all IP addresses find geo information from the database, When this +occurs, no `target_field` is inserted into the document. + +Here is an example of what documents will be indexed as when information for "93.114.45.13" +cannot be found: + +[source,js] +-------------------------------------------------- +PUT _ingest/pipeline/geoip +{ + "description" : "Add geoip info", + "processors" : [ + { + "geoip" : { + "field" : "ip" + } + } + ] +} +PUT my_index/my_type/my_id?pipeline=geoip +{ + "ip": "93.114.45.13" +} +GET my_index/my_type/my_id +-------------------------------------------------- +// CONSOLE + +Which returns: + +[source,js] +-------------------------------------------------- +{ + "found": true, + "_index": "my_index", + "_type": "my_type", + "_id": "my_id", + "_version": 1, + "_source": { + "ip": "93.114.45.13" + } +} +-------------------------------------------------- +// TESTRESPONSE diff --git a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java index 5923e3b690e..95a0b85dba3 100644 --- a/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java +++ b/plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java @@ -97,7 +97,9 @@ public final class GeoIpProcessor extends AbstractProcessor { throw new ElasticsearchParseException("Unsupported database type [" + dbReader.getMetadata().getDatabaseType() + "]", new IllegalStateException()); } - ingestDocument.setFieldValue(targetField, geoData); + if (geoData.isEmpty() == false) { + ingestDocument.setFieldValue(targetField, geoData); + } } @Override @@ -149,28 +151,50 @@ public final class GeoIpProcessor extends AbstractProcessor { geoData.put("ip", NetworkAddress.format(ipAddress)); break; case COUNTRY_ISO_CODE: - geoData.put("country_iso_code", country.getIsoCode()); + String countryIsoCode = country.getIsoCode(); + if (countryIsoCode != null) { + geoData.put("country_iso_code", countryIsoCode); + } break; case COUNTRY_NAME: - geoData.put("country_name", country.getName()); + String countryName = country.getName(); + if (countryName != null) { + geoData.put("country_name", countryName); + } break; case CONTINENT_NAME: - geoData.put("continent_name", continent.getName()); + String continentName = continent.getName(); + if (continentName != null) { + geoData.put("continent_name", continentName); + } break; case REGION_NAME: - geoData.put("region_name", subdivision.getName()); + String subdivisionName = subdivision.getName(); + if (subdivisionName != null) { + geoData.put("region_name", subdivisionName); + } break; case CITY_NAME: - geoData.put("city_name", city.getName()); + String cityName = city.getName(); + if (cityName != null) { + geoData.put("city_name", cityName); + } break; case TIMEZONE: - geoData.put("timezone", location.getTimeZone()); + String locationTimeZone = location.getTimeZone(); + if (locationTimeZone != null) { + geoData.put("timezone", locationTimeZone); + } break; case LOCATION: - Map locationObject = new HashMap<>(); - locationObject.put("lat", location.getLatitude()); - locationObject.put("lon", location.getLongitude()); - geoData.put("location", locationObject); + Double latitude = location.getLatitude(); + Double longitude = location.getLongitude(); + if (latitude != null && longitude != null) { + Map locationObject = new HashMap<>(); + locationObject.put("lat", latitude); + locationObject.put("lon", longitude); + geoData.put("location", locationObject); + } break; } } @@ -202,13 +226,22 @@ public final class GeoIpProcessor extends AbstractProcessor { geoData.put("ip", NetworkAddress.format(ipAddress)); break; case COUNTRY_ISO_CODE: - geoData.put("country_iso_code", country.getIsoCode()); + String countryIsoCode = country.getIsoCode(); + if (countryIsoCode != null) { + geoData.put("country_iso_code", countryIsoCode); + } break; case COUNTRY_NAME: - geoData.put("country_name", country.getName()); + String countryName = country.getName(); + if (countryName != null) { + geoData.put("country_name", countryName); + } break; case CONTINENT_NAME: - geoData.put("continent_name", continent.getName()); + String continentName = continent.getName(); + if (continentName != null) { + geoData.put("continent_name", continentName); + } break; } } diff --git a/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java b/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java index f3141c735d2..71bb4f65f85 100644 --- a/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java +++ b/plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java @@ -33,6 +33,8 @@ import java.util.zip.GZIPInputStream; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; public class GeoIpProcessorTests extends ESTestCase { @@ -63,6 +65,23 @@ public class GeoIpProcessorTests extends ESTestCase { assertThat(geoData.get("location"), equalTo(location)); } + public void testCityWithMissingLocation() throws Exception { + InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz"); + GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field", + new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class)); + + Map document = new HashMap<>(); + document.put("source_field", "93.114.45.13"); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo("93.114.45.13")); + @SuppressWarnings("unchecked") + Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(geoData.size(), equalTo(1)); + assertThat(geoData.get("ip"), equalTo("93.114.45.13")); + } + public void testCountry() throws Exception { InputStream database = getDatabaseFileInputStream("/GeoLite2-Country.mmdb.gz"); GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field", @@ -83,6 +102,23 @@ public class GeoIpProcessorTests extends ESTestCase { assertThat(geoData.get("continent_name"), equalTo("Europe")); } + public void testCountryWithMissingLocation() throws Exception { + InputStream database = getDatabaseFileInputStream("/GeoLite2-Country.mmdb.gz"); + GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field", + new DatabaseReader.Builder(database).build(), "target_field", EnumSet.allOf(GeoIpProcessor.Property.class)); + + Map document = new HashMap<>(); + document.put("source_field", "93.114.45.13"); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + + assertThat(ingestDocument.getSourceAndMetadata().get("source_field"), equalTo("93.114.45.13")); + @SuppressWarnings("unchecked") + Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); + assertThat(geoData.size(), equalTo(1)); + assertThat(geoData.get("ip"), equalTo("93.114.45.13")); + } + public void testAddressIsNotInTheDatabase() throws Exception { InputStream database = getDatabaseFileInputStream("/GeoLite2-City.mmdb.gz"); GeoIpProcessor processor = new GeoIpProcessor(randomAsciiOfLength(10), "source_field", @@ -92,9 +128,7 @@ public class GeoIpProcessorTests extends ESTestCase { document.put("source_field", "127.0.0.1"); IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); processor.execute(ingestDocument); - @SuppressWarnings("unchecked") - Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field"); - assertThat(geoData.size(), equalTo(0)); + assertThat(ingestDocument.getSourceAndMetadata().containsKey("target_field"), is(false)); } /** Don't silently do DNS lookups or anything trappy on bogus data */ diff --git a/plugins/ingest-geoip/src/test/resources/rest-api-spec/test/ingest_geoip/20_geoip_processor.yaml b/plugins/ingest-geoip/src/test/resources/rest-api-spec/test/ingest_geoip/20_geoip_processor.yaml index 33e9ec1ca46..f662f34ab57 100644 --- a/plugins/ingest-geoip/src/test/resources/rest-api-spec/test/ingest_geoip/20_geoip_processor.yaml +++ b/plugins/ingest-geoip/src/test/resources/rest-api-spec/test/ingest_geoip/20_geoip_processor.yaml @@ -122,3 +122,76 @@ - length: { _source.geoip: 2 } - match: { _source.geoip.country_iso_code: "US" } - match: { _source.geoip.continent_name: "North America" } + +--- +"Test geoip processor with geopoint mapping (both missing and including location)": + - do: + indices.create: + index: test + body: > + { + "mappings" : { + "test" : { + "properties" : { + "geoip.location" : { + "type": "geo_point" + } + } + } + } + } + - match: { acknowledged: true } + + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "geoip" : { + "field" : "field1" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + type: test + id: 1 + pipeline: "my_pipeline" + body: { field1: "93.114.45.13" } + + - do: + get: + index: test + type: test + id: 1 + - match: { _source.field1: "93.114.45.13" } + - is_false: _source.geoip + + - do: + index: + index: test + type: test + id: 2 + pipeline: "my_pipeline" + body: { field1: "128.101.101.101" } + + - do: + get: + index: test + type: test + id: 2 + - match: { _source.field1: "128.101.101.101" } + - length: { _source.geoip: 5 } + - match: { _source.geoip.city_name: "Minneapolis" } + - match: { _source.geoip.country_iso_code: "US" } + - match: { _source.geoip.location.lon: -93.2166 } + - match: { _source.geoip.location.lat: 44.9759 } + - match: { _source.geoip.region_name: "Minnesota" } + - match: { _source.geoip.continent_name: "North America" }