Move ingest-geoip default databases out of config (#36949)

Today the default databases bundled with ingest-geoip are treated as
configuration files that we unbundle into the Elasticsearch
configuration directory. This can cause problems for users using our
Docker images if they bind mount over the configuration
directory. Additionally, it creates complexity when trying to convert
ingest-geoip to a module. This commit moves these databases out of the
configuration directory and instead loads from the plugin
directory. Further, custom databases can still be put into the
configuration directory.
This commit is contained in:
Jason Tedor 2018-12-21 18:05:50 -05:00 committed by GitHub
parent 25625d2407
commit 6781a29f9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 176 additions and 56 deletions

View File

@ -46,7 +46,7 @@ compileTestJava.options.compilerArgs << "-Xlint:-rawtypes,-unchecked"
bundlePlugin { bundlePlugin {
from("${project.buildDir}/ingest-geoip") { from("${project.buildDir}/ingest-geoip") {
into 'config/' into '/'
} }
} }

View File

@ -28,6 +28,7 @@ import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.SuppressForbidden; import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.cache.Cache; import org.elasticsearch.common.cache.Cache;
import org.elasticsearch.common.cache.CacheBuilder; import org.elasticsearch.common.cache.CacheBuilder;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.ingest.Processor; import org.elasticsearch.ingest.Processor;
@ -54,6 +55,8 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable
public static final Setting<Long> CACHE_SIZE = public static final Setting<Long> CACHE_SIZE =
Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope); Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope);
static String[] DEFAULT_DATABASE_FILENAMES = new String[]{"GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb"};
private Map<String, DatabaseReaderLazyLoader> databaseReaders; private Map<String, DatabaseReaderLazyLoader> databaseReaders;
@Override @Override
@ -66,48 +69,89 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable
if (databaseReaders != null) { if (databaseReaders != null) {
throw new IllegalStateException("getProcessors called twice for geoip plugin!!"); throw new IllegalStateException("getProcessors called twice for geoip plugin!!");
} }
Path geoIpConfigDirectory = parameters.env.configFile().resolve("ingest-geoip"); final Path geoIpDirectory = getGeoIpDirectory(parameters);
final Path geoIpConfigDirectory = parameters.env.configFile().resolve("ingest-geoip");
long cacheSize = CACHE_SIZE.get(parameters.env.settings()); long cacheSize = CACHE_SIZE.get(parameters.env.settings());
try { try {
databaseReaders = loadDatabaseReaders(geoIpConfigDirectory); databaseReaders = loadDatabaseReaders(geoIpDirectory, geoIpConfigDirectory);
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
return Collections.singletonMap(GeoIpProcessor.TYPE, new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(cacheSize))); return Collections.singletonMap(GeoIpProcessor.TYPE, new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(cacheSize)));
} }
static Map<String, DatabaseReaderLazyLoader> loadDatabaseReaders(Path geoIpConfigDirectory) throws IOException { /*
if (Files.exists(geoIpConfigDirectory) == false && Files.isDirectory(geoIpConfigDirectory)) { * In GeoIpProcessorNonIngestNodeTests, ingest-geoip is loaded on the classpath. This means that the plugin is never unbundled into a
throw new IllegalStateException("the geoip directory [" + geoIpConfigDirectory + "] containing databases doesn't exist"); * directory where the database files would live. Therefore, we have to copy these database files ourselves. To do this, we need the
* ability to specify where those database files would go. We do this by adding a plugin that registers ingest.geoip.database_path as
* an actual setting. Otherwise, in production code, this setting is not registered and the database path is not configurable.
*/
@SuppressForbidden(reason = "PathUtils#get")
private Path getGeoIpDirectory(Processor.Parameters parameters) {
final Path geoIpDirectory;
if (parameters.env.settings().get("ingest.geoip.database_path") == null) {
geoIpDirectory = parameters.env.pluginsFile().resolve("ingest-geoip");
} else {
geoIpDirectory = PathUtils.get(parameters.env.settings().get("ingest.geoip.database_path"));
} }
boolean loadDatabaseOnHeap = Booleans.parseBoolean(System.getProperty("es.geoip.load_db_on_heap", "false")); return geoIpDirectory;
Map<String, DatabaseReaderLazyLoader> databaseReaders = new HashMap<>(); }
try (Stream<Path> databaseFiles = Files.list(geoIpConfigDirectory)) {
PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb"); static Map<String, DatabaseReaderLazyLoader> loadDatabaseReaders(Path geoIpDirectory, Path geoIpConfigDirectory) throws IOException {
// Use iterator instead of forEach otherwise IOException needs to be caught twice... assertDatabaseExistence(geoIpDirectory, true);
Iterator<Path> iterator = databaseFiles.iterator(); assertDatabaseExistence(geoIpConfigDirectory, false);
while (iterator.hasNext()) { final boolean loadDatabaseOnHeap = Booleans.parseBoolean(System.getProperty("es.geoip.load_db_on_heap", "false"));
Path databasePath = iterator.next(); final Map<String, DatabaseReaderLazyLoader> databaseReaders = new HashMap<>();
if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
String databaseFileName = databasePath.getFileName().toString(); // load the default databases
DatabaseReaderLazyLoader holder = new DatabaseReaderLazyLoader( for (final String databaseFilename : DEFAULT_DATABASE_FILENAMES) {
databasePath, final Path databasePath = geoIpDirectory.resolve(databaseFilename);
() -> { final DatabaseReaderLazyLoader loader = createLoader(databasePath, loadDatabaseOnHeap);
DatabaseReader.Builder builder = createDatabaseBuilder(databasePath).withCache(NoCache.getInstance()); databaseReaders.put(databaseFilename, loader);
if (loadDatabaseOnHeap) { }
builder.fileMode(Reader.FileMode.MEMORY);
} else { // load any custom databases
builder.fileMode(Reader.FileMode.MEMORY_MAPPED); if (Files.exists(geoIpConfigDirectory)) {
} try (Stream<Path> databaseFiles = Files.list(geoIpConfigDirectory)) {
return builder.build(); PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb");
}); // Use iterator instead of forEach otherwise IOException needs to be caught twice...
databaseReaders.put(databaseFileName, holder); Iterator<Path> iterator = databaseFiles.iterator();
while (iterator.hasNext()) {
Path databasePath = iterator.next();
if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
String databaseFileName = databasePath.getFileName().toString();
final DatabaseReaderLazyLoader loader = createLoader(databasePath, loadDatabaseOnHeap);
databaseReaders.put(databaseFileName, loader);
}
} }
} }
} }
return Collections.unmodifiableMap(databaseReaders); return Collections.unmodifiableMap(databaseReaders);
} }
private static DatabaseReaderLazyLoader createLoader(Path databasePath, boolean loadDatabaseOnHeap) {
return new DatabaseReaderLazyLoader(
databasePath,
() -> {
DatabaseReader.Builder builder = createDatabaseBuilder(databasePath).withCache(NoCache.getInstance());
if (loadDatabaseOnHeap) {
builder.fileMode(Reader.FileMode.MEMORY);
} else {
builder.fileMode(Reader.FileMode.MEMORY_MAPPED);
}
return builder.build();
});
}
private static void assertDatabaseExistence(final Path path, final boolean exists) throws IOException {
for (final String database : DEFAULT_DATABASE_FILENAMES) {
if (Files.exists(path.resolve(database)) != exists) {
final String message = "expected database [" + database + "] to " + (exists ? "" : "not ") + "exist in [" + path + "]";
throw new IOException(message);
}
}
}
@SuppressForbidden(reason = "Maxmind API requires java.io.File") @SuppressForbidden(reason = "Maxmind API requires java.io.File")
private static DatabaseReader.Builder createDatabaseBuilder(Path databasePath) { private static DatabaseReader.Builder createDatabaseBuilder(Path databasePath) {
return new DatabaseReader.Builder(databasePath.toFile()); return new DatabaseReader.Builder(databasePath.toFile());

View File

@ -44,7 +44,9 @@ import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasToString;
import static org.hamcrest.Matchers.sameInstance; import static org.hamcrest.Matchers.sameInstance;
public class GeoIpProcessorFactoryTests extends ESTestCase { public class GeoIpProcessorFactoryTests extends ESTestCase {
@ -60,17 +62,13 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
return; return;
} }
Path configDir = createTempDir(); final Path geoIpDir = createTempDir();
Path geoIpConfigDir = configDir.resolve("ingest-geoip"); final Path configDir = createTempDir();
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
Files.createDirectories(geoIpConfigDir); Files.createDirectories(geoIpConfigDir);
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")), copyDatabaseFiles(geoIpDir);
geoIpConfigDir.resolve("GeoLite2-City.mmdb"));
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb")),
geoIpConfigDir.resolve("GeoLite2-Country.mmdb"));
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb")),
geoIpConfigDir.resolve("GeoLite2-ASN.mmdb"));
databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir); databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir);
} }
@AfterClass @AfterClass
@ -297,21 +295,16 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
// This test uses a MappedByteBuffer which will keep the file mappings active until it is garbage-collected. // This test uses a MappedByteBuffer which will keep the file mappings active until it is garbage-collected.
// As a consequence, the corresponding file appears to be still in use and Windows cannot delete it. // As a consequence, the corresponding file appears to be still in use and Windows cannot delete it.
assumeFalse("windows deletion behavior is asinine", Constants.WINDOWS); assumeFalse("windows deletion behavior is asinine", Constants.WINDOWS);
Path configDir = createTempDir(); final Path geoIpDir = createTempDir();
Path geoIpConfigDir = configDir.resolve("ingest-geoip"); final Path configDir = createTempDir();
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
Files.createDirectories(geoIpConfigDir); Files.createDirectories(geoIpConfigDir);
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")), copyDatabaseFiles(geoIpDir);
geoIpConfigDir.resolve("GeoLite2-City.mmdb"));
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb")),
geoIpConfigDir.resolve("GeoLite2-Country.mmdb"));
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb")),
geoIpConfigDir.resolve("GeoLite2-ASN.mmdb"));
// Loading another database reader instances, because otherwise we can't test lazy loading as the // Loading another database reader instances, because otherwise we can't test lazy loading as the
// database readers used at class level are reused between tests. (we want to keep that otherwise running this // database readers used at class level are reused between tests. (we want to keep that otherwise running this
// test will take roughly 4 times more time) // test will take roughly 4 times more time)
Map<String, DatabaseReaderLazyLoader> databaseReaders = Map<String, DatabaseReaderLazyLoader> databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir);
IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir);
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(1000)); GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(1000));
for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) { for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {
assertNull(lazyLoader.databaseReader.get()); assertNull(lazyLoader.databaseReader.get());
@ -354,4 +347,79 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
assertNotNull(databaseReaders.get("GeoLite2-ASN.mmdb").databaseReader.get()); assertNotNull(databaseReaders.get("GeoLite2-ASN.mmdb").databaseReader.get());
} }
public void testLoadingCustomDatabase() throws IOException {
final Path geoIpDir = createTempDir();
final Path configDir = createTempDir();
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
Files.createDirectories(geoIpConfigDir);
copyDatabaseFiles(geoIpDir);
// fake the GeoIP2-City database
copyDatabaseFile(geoIpConfigDir, "GeoLite2-City.mmdb");
Files.move(geoIpConfigDir.resolve("GeoLite2-City.mmdb"), geoIpConfigDir.resolve("GeoIP2-City.mmdb"));
/*
* Loading another database reader instances, because otherwise we can't test lazy loading as the database readers used at class
* level are reused between tests. (we want to keep that otherwise running this test will take roughly 4 times more time).
*/
final Map<String, DatabaseReaderLazyLoader> databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir);
final GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(1000));
for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {
assertNull(lazyLoader.databaseReader.get());
}
final Map<String, Object> field = Collections.singletonMap("_field", "1.1.1.1");
final IngestDocument document = new IngestDocument("index", "type", "id", "routing", 1L, VersionType.EXTERNAL, field);
Map<String, Object> config = new HashMap<>();
config.put("field", "_field");
config.put("database_file", "GeoIP2-City.mmdb");
final GeoIpProcessor city = factory.create(null, "_tag", config);
// these are lazy loaded until first use so we expect null here
assertNull(databaseReaders.get("GeoIP2-City.mmdb").databaseReader.get());
city.execute(document);
// the first ingest should trigger a database load
assertNotNull(databaseReaders.get("GeoIP2-City.mmdb").databaseReader.get());
}
public void testDatabaseNotExistsInDir() throws IOException {
final Path geoIpDir = createTempDir();
final Path configDir = createTempDir();
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
if (randomBoolean()) {
Files.createDirectories(geoIpConfigDir);
}
copyDatabaseFiles(geoIpDir);
final String databaseFilename = randomFrom(IngestGeoIpPlugin.DEFAULT_DATABASE_FILENAMES);
Files.delete(geoIpDir.resolve(databaseFilename));
final IOException e =
expectThrows(IOException.class, () -> IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir));
assertThat(e, hasToString(containsString("expected database [" + databaseFilename + "] to exist in [" + geoIpDir + "]")));
}
public void testDatabaseExistsInConfigDir() throws IOException {
final Path geoIpDir = createTempDir();
final Path configDir = createTempDir();
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
Files.createDirectories(geoIpConfigDir);
copyDatabaseFiles(geoIpDir);
final String databaseFilename = randomFrom(IngestGeoIpPlugin.DEFAULT_DATABASE_FILENAMES);
copyDatabaseFile(geoIpConfigDir, databaseFilename);
final IOException e =
expectThrows(IOException.class, () -> IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir));
assertThat(e, hasToString(containsString("expected database [" + databaseFilename + "] to not exist in [" + geoIpConfigDir + "]")));
}
private static void copyDatabaseFile(final Path path, final String databaseFilename) throws IOException {
Files.copy(
new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/" + databaseFilename)),
path.resolve(databaseFilename));
}
private static void copyDatabaseFiles(final Path path) throws IOException {
for (final String databaseFilename : IngestGeoIpPlugin.DEFAULT_DATABASE_FILENAMES) {
copyDatabaseFile(path, databaseFilename);
}
}
} }

View File

@ -23,6 +23,7 @@ import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.ingest.PutPipelineRequest; import org.elasticsearch.action.ingest.PutPipelineRequest;
import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.XContentType;
@ -41,27 +42,30 @@ import java.nio.file.Path;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
public class GeoIpProcessorNonIngestNodeTests extends ESIntegTestCase { public class GeoIpProcessorNonIngestNodeTests extends ESIntegTestCase {
public static class IngestGeoIpSettingsPlugin extends Plugin {
@Override
public List<Setting<?>> getSettings() {
return Collections.singletonList(Setting.simpleString("ingest.geoip.database_path", Setting.Property.NodeScope));
}
}
@Override @Override
protected Collection<Class<? extends Plugin>> nodePlugins() { protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singleton(IngestGeoIpPlugin.class); return Arrays.asList(IngestGeoIpPlugin.class, IngestGeoIpSettingsPlugin.class);
} }
@Override @Override
protected Settings nodeSettings(final int nodeOrdinal) { protected Settings nodeSettings(final int nodeOrdinal) {
return Settings.builder().put("node.ingest", false).put(super.nodeSettings(nodeOrdinal)).build(); final Path databasePath = createTempDir();
}
@Override
protected Path nodeConfigPath(final int nodeOrdinal) {
final Path configPath = createTempDir();
try { try {
final Path databasePath = configPath.resolve("ingest-geoip");
Files.createDirectories(databasePath); Files.createDirectories(databasePath);
Files.copy( Files.copy(
new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")), new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")),
@ -75,7 +79,11 @@ public class GeoIpProcessorNonIngestNodeTests extends ESIntegTestCase {
} catch (final IOException e) { } catch (final IOException e) {
throw new UncheckedIOException(e); throw new UncheckedIOException(e);
} }
return configPath; return Settings.builder()
.put("ingest.geoip.database_path", databasePath)
.put("node.ingest", false)
.put(super.nodeSettings(nodeOrdinal))
.build();
} }
/** /**