Expose MMapDirectory.preLoad(). #18880

The MMapDirectory has a switch that allows the content of files to be loaded
into the filesystem cache upon opening. This commit exposes it with the new
`index.store.pre_load` setting.
This commit is contained in:
Adrien Grand 2016-06-15 09:07:18 +02:00
parent 459665914b
commit 93415d4506
6 changed files with 148 additions and 57 deletions

View File

@ -134,6 +134,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING, MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING,
BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING, BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING,
IndexModule.INDEX_STORE_TYPE_SETTING, IndexModule.INDEX_STORE_TYPE_SETTING,
IndexModule.INDEX_STORE_PRE_LOAD_SETTING,
IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING, IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING,
IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING, IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING,
PrimaryShardAllocator.INDEX_RECOVERY_INITIAL_SHARDS_SETTING, PrimaryShardAllocator.INDEX_RECOVERY_INITIAL_SHARDS_SETTING,

View File

@ -45,6 +45,7 @@ import org.elasticsearch.indices.mapper.MapperRegistry;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -74,6 +75,14 @@ public final class IndexModule {
public static final Setting<String> INDEX_STORE_TYPE_SETTING = public static final Setting<String> INDEX_STORE_TYPE_SETTING =
new Setting<>("index.store.type", "", Function.identity(), Property.IndexScope, Property.NodeScope); new Setting<>("index.store.type", "", Function.identity(), Property.IndexScope, Property.NodeScope);
/** On which extensions to load data into the file-system cache upon opening of files.
* This only works with the mmap directory, and even in that case is still
* best-effort only. */
public static final Setting<List<String>> INDEX_STORE_PRE_LOAD_SETTING =
Setting.listSetting("index.store.preload", Collections.emptyList(), Function.identity(),
Property.IndexScope, Property.NodeScope);
public static final String SIMILARITY_SETTINGS_PREFIX = "index.similarity"; public static final String SIMILARITY_SETTINGS_PREFIX = "index.similarity";
// whether to use the query cache // whether to use the query cache

View File

@ -31,13 +31,11 @@ import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.lucene.store.SleepingLockWrapper; import org.apache.lucene.store.SleepingLockWrapper;
import org.apache.lucene.store.StoreRateLimiting; import org.apache.lucene.store.StoreRateLimiting;
import org.apache.lucene.util.Constants;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.CounterMetric;
import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.IndexModule; import org.elasticsearch.index.IndexModule;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.shard.ShardPath; import org.elasticsearch.index.shard.ShardPath;
@ -45,7 +43,7 @@ import org.elasticsearch.index.shard.ShardPath;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Collections; import java.util.HashSet;
import java.util.Set; import java.util.Set;
/** /**
@ -87,8 +85,12 @@ public class FsDirectoryService extends DirectoryService implements StoreRateLim
@Override @Override
public Directory newDirectory() throws IOException { public Directory newDirectory() throws IOException {
final Path location = path.resolveIndex(); final Path location = path.resolveIndex();
final LockFactory lockFactory = indexSettings.getValue(INDEX_LOCK_FACTOR_SETTING);
Files.createDirectories(location); Files.createDirectories(location);
Directory wrapped = newFSDirectory(location, indexSettings.getValue(INDEX_LOCK_FACTOR_SETTING)); Directory wrapped = newFSDirectory(location, lockFactory);
Set<String> preLoadExtensions = new HashSet<>(
indexSettings.getValue(IndexModule.INDEX_STORE_PRE_LOAD_SETTING));
wrapped = setPreload(wrapped, location, lockFactory, preLoadExtensions);
if (IndexMetaData.isOnSharedFilesystem(indexSettings.getSettings())) { if (IndexMetaData.isOnSharedFilesystem(indexSettings.getSettings())) {
wrapped = new SleepingLockWrapper(wrapped, 5000); wrapped = new SleepingLockWrapper(wrapped, 5000);
} }
@ -100,25 +102,11 @@ public class FsDirectoryService extends DirectoryService implements StoreRateLim
rateLimitingTimeInNanos.inc(nanos); rateLimitingTimeInNanos.inc(nanos);
} }
/*
* We are mmapping norms, docvalues as well as term dictionaries, all other files are served through NIOFS
* this provides good random access performance while not creating unnecessary mmaps for files like stored
* fields etc.
*/
private static final Set<String> PRIMARY_EXTENSIONS = Collections.unmodifiableSet(Sets.newHashSet("nvd", "dvd", "tim"));
protected Directory newFSDirectory(Path location, LockFactory lockFactory) throws IOException { protected Directory newFSDirectory(Path location, LockFactory lockFactory) throws IOException {
final String storeType = indexSettings.getSettings().get(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), final String storeType = indexSettings.getSettings().get(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(),
IndexModule.Type.FS.getSettingsKey()); IndexModule.Type.FS.getSettingsKey());
if (IndexModule.Type.FS.match(storeType) || isDefault(storeType)) { if (IndexModule.Type.FS.match(storeType) || IndexModule.Type.DEFAULT.match(storeType)) {
final FSDirectory open = FSDirectory.open(location, lockFactory); // use lucene defaults return FSDirectory.open(location, lockFactory); // use lucene defaults
if (open instanceof MMapDirectory
&& isDefault(storeType)
&& Constants.WINDOWS == false) {
return newDefaultDir(location, (MMapDirectory) open, lockFactory);
}
return open;
} else if (IndexModule.Type.SIMPLEFS.match(storeType)) { } else if (IndexModule.Type.SIMPLEFS.match(storeType)) {
return new SimpleFSDirectory(location, lockFactory); return new SimpleFSDirectory(location, lockFactory);
} else if (IndexModule.Type.NIOFS.match(storeType)) { } else if (IndexModule.Type.NIOFS.match(storeType)) {
@ -129,17 +117,25 @@ public class FsDirectoryService extends DirectoryService implements StoreRateLim
throw new IllegalArgumentException("No directory found for type [" + storeType + "]"); throw new IllegalArgumentException("No directory found for type [" + storeType + "]");
} }
private static boolean isDefault(String storeType) { private static Directory setPreload(Directory directory, Path location, LockFactory lockFactory,
return IndexModule.Type.DEFAULT.match(storeType); Set<String> preLoadExtensions) throws IOException {
if (preLoadExtensions.isEmpty() == false
&& directory instanceof MMapDirectory
&& ((MMapDirectory) directory).getPreload() == false) {
if (preLoadExtensions.contains("*")) {
((MMapDirectory) directory).setPreload(true);
return directory;
} }
MMapDirectory primary = new MMapDirectory(location, lockFactory);
private Directory newDefaultDir(Path location, final MMapDirectory mmapDir, LockFactory lockFactory) throws IOException { primary.setPreload(true);
return new FileSwitchDirectory(PRIMARY_EXTENSIONS, mmapDir, new NIOFSDirectory(location, lockFactory), true) { return new FileSwitchDirectory(preLoadExtensions, primary, directory, true) {
@Override @Override
public String[] listAll() throws IOException { public String[] listAll() throws IOException {
// Avoid doing listAll twice: // avoid listing twice
return mmapDir.listAll(); return primary.listAll();
} }
}; };
} }
return directory;
}
} }

View File

@ -19,13 +19,13 @@
package org.elasticsearch.index.store; package org.elasticsearch.index.store;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.FileSwitchDirectory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.RateLimitedFSDirectory; import org.apache.lucene.store.RateLimitedFSDirectory;
import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.store.SleepingLockWrapper; import org.apache.lucene.store.SleepingLockWrapper;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexModule; import org.elasticsearch.index.IndexModule;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardId;
@ -36,6 +36,7 @@ import org.elasticsearch.test.IndexSettingsModule;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Arrays;
public class FsDirectoryServiceTests extends ESTestCase { public class FsDirectoryServiceTests extends ESTestCase {
@ -73,4 +74,43 @@ public class FsDirectoryServiceTests extends ESTestCase {
assertFalse(delegate instanceof SleepingLockWrapper); assertFalse(delegate instanceof SleepingLockWrapper);
assertTrue(delegate instanceof SimpleFSDirectory); assertTrue(delegate instanceof SimpleFSDirectory);
} }
public void testPreload() throws IOException {
doTestPreload();
doTestPreload("nvd", "dvd", "tim");
doTestPreload("*");
}
private void doTestPreload(String...preload) throws IOException {
Settings build = Settings.builder()
.put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), "mmapfs")
.putArray(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), preload)
.build();
IndexSettings settings = IndexSettingsModule.newIndexSettings("foo", build);
IndexStoreConfig config = new IndexStoreConfig(settings.getSettings());
IndexStore store = new IndexStore(settings, config);
Path tempDir = createTempDir().resolve(settings.getUUID()).resolve("0");
Files.createDirectories(tempDir);
ShardPath path = new ShardPath(false, tempDir, tempDir, new ShardId(settings.getIndex(), 0));
FsDirectoryService fsDirectoryService = new FsDirectoryService(settings, store, path);
Directory directory = fsDirectoryService.newDirectory();
assertTrue(directory instanceof RateLimitedFSDirectory);
RateLimitedFSDirectory rateLimitingDirectory = (RateLimitedFSDirectory) directory;
Directory delegate = rateLimitingDirectory.getDelegate();
assertFalse(delegate instanceof SleepingLockWrapper);
if (preload.length == 0) {
assertTrue(delegate.toString(), delegate instanceof MMapDirectory);
assertFalse(((MMapDirectory) delegate).getPreload());
} else if (Arrays.asList(preload).contains("*")) {
assertTrue(delegate.toString(), delegate instanceof MMapDirectory);
assertTrue(((MMapDirectory) delegate).getPreload());
} else {
assertTrue(delegate.toString(), delegate instanceof FileSwitchDirectory);
FileSwitchDirectory fsd = (FileSwitchDirectory) delegate;
assertTrue(fsd.getPrimaryDir() instanceof MMapDirectory);
assertTrue(((MMapDirectory) fsd.getPrimaryDir()).getPreload());
assertTrue(fsd.getSecondaryDir() instanceof MMapDirectory);
assertFalse(((MMapDirectory) fsd.getSecondaryDir()).getPreload());
}
}
} }

View File

@ -77,6 +77,7 @@ public class IndexStoreTests extends ESTestCase {
assertTrue(type + " " + directory.toString(), directory instanceof SimpleFSDirectory); assertTrue(type + " " + directory.toString(), directory instanceof SimpleFSDirectory);
break; break;
case FS: case FS:
case DEFAULT:
if (Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) { if (Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) {
assertTrue(directory.toString(), directory instanceof MMapDirectory); assertTrue(directory.toString(), directory instanceof MMapDirectory);
} else if (Constants.WINDOWS) { } else if (Constants.WINDOWS) {
@ -85,19 +86,6 @@ public class IndexStoreTests extends ESTestCase {
assertTrue(directory.toString(), directory instanceof NIOFSDirectory); assertTrue(directory.toString(), directory instanceof NIOFSDirectory);
} }
break; break;
case DEFAULT:
if (Constants.WINDOWS) {
if (Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) {
assertTrue(type + " " + directory.toString(), directory instanceof MMapDirectory);
} else {
assertTrue(type + " " + directory.toString(), directory instanceof SimpleFSDirectory);
}
} else if (Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) {
assertTrue(type + " " + directory.toString(), directory instanceof FileSwitchDirectory);
} else {
assertTrue(type + " " + directory.toString(), directory instanceof NIOFSDirectory);
}
break;
default: default:
fail(); fail();
} }

View File

@ -7,9 +7,9 @@ The store module allows you to control how index data is stored and accessed on
[[file-system]] [[file-system]]
=== File system storage types === File system storage types
There are different file system implementations or _storage types_. The best There are different file system implementations or _storage types_. By default,
one for the operating environment will be automatically chosen: `simplefs` on elasticsearch will pick the best implementation based on the operating
Windows 32bit, `niofs` on other 32bit systems and `mmapfs` on 64bit systems. environment.
This can be overridden for all indices by adding this to the This can be overridden for all indices by adding this to the
`config/elasticsearch.yml` file: `config/elasticsearch.yml` file:
@ -36,6 +36,12 @@ experimental[This is an expert-only setting and may be removed in the future]
The following sections lists all the different storage types supported. The following sections lists all the different storage types supported.
`fs`::
Default file system implementation. This will pick the best implementation
depending on the operating environment: `simplefs` on Windows 32bit, `niofs`
on other 32bit systems and `mmapfs` on 64bit systems.
[[simplefs]]`simplefs`:: [[simplefs]]`simplefs`::
The Simple FS type is a straightforward implementation of file system The Simple FS type is a straightforward implementation of file system
@ -60,13 +66,64 @@ process equal to the size of the file being mapped. Before using this
class, be sure you have allowed plenty of class, be sure you have allowed plenty of
<<vm-max-map-count,virtual address space>>. <<vm-max-map-count,virtual address space>>.
[[default_fs]]`default_fs` deprecated[5.0.0, The `default_fs` store type is deprecated - use `mmapfs` instead]:: [[default_fs]]`default_fs` deprecated[5.0.0, The `default_fs` store type is deprecated - use `fs` instead]::
The `default` type is a hybrid of NIO FS and MMapFS, which chooses the best The `default` type is deprecated and is aliased to `fs` for backward
file system for each type of file. Currently only the Lucene term dictionary, compatibility.
doc values and points files are memory mapped to reduce the impact on the
operating system. All other files are opened using Lucene `NIOFSDirectory`.
Address space settings (<<vm-max-map-count>>) might also apply if your term
dictionary are large, if you index many fields that use points (numerics, dates
and ip addresses) or if you have many fields with doc values.
=== Pre-loading data into the file system cache
experimental[This is an expert-only setting and may be removed in the future]
By default, elasticsearch completely relies on the operating system file system
cache for caching I/O operations. It is possible to set `index.store.preload`
in order to tell the operating system to load the content of hot index
files into memory upon opening. This setting accept a comma-separated list of
files extensions: all files whose extenion is in the list will be pre-loaded
upon opening. This can be useful to improve search performance of an index,
especially when the host operating system is restarted, since this causes the
file system cache to be trashed. However note that this may slow down the
opening of indices, as they will only become available after data have been
loaded into physical memory.
This setting is best-effort only and may not work at all depending on the store
type and host operating system.
The `index.store.pre_load` is a static setting that can either be set in the
`config/elasticsearch.yml`:
[source,yaml]
---------------------------------
index.store.pre_load: ["nvd", "dvd"]
---------------------------------
or in the index settings at index creation time:
[source,js]
---------------------------------
PUT /my_index
{
"settings": {
"index.store.pre_load": ["nvd", "dvd"]
}
}
---------------------------------
The default value is the empty array, which means that nothing will be loaded
into the file-system cache eagerly. For indices that are actively searched,
you might want to set it to `["nvd", "dvd"]`, which will cause norms and doc
values to be loaded eagerly into physical memory. These are the two first
extensions to look at since elasticsearch performs random access on them.
A wildcard can be used in order to indicate that all files should be preloaded:
`index.store.pre_load: ["*"]`. Note however that it is generally not useful to
load all files into memory, in particular those for stored fields and term
vectors, so a better option might be to set it to
`["nvd", "dvd", "tim", "doc", "dim"]`, which will preload norms, doc values,
terms dictionaries, postings lists and points, which are the most important
parts of the index for search and aggregations.
Note that this setting can be dangerous on indices that are larger than the size
of the main memory of the host, as it would cause the filesystem cache to be
trashed upon reopens after large merges, which would make indexing and searching
_slower_.