[core] add best_compression option for Lucene 5.0
Upgrades lucene to latest, and supports the BEST_COMPRESSION parameter now supported (with backwards compatibility, etc) in Lucene. This option uses deflate, tuned for highly compressible data. index.codec:: The default value compresses stored data with LZ4 compression, but this can be set to best_compression for a higher compression ratio, at the expense of slower stored fields performance. IMO its safest to implement as a named codec here, because ES already has logic to handle this correctly, and because its unrealistic to have a plethora of options to Lucene's default codec... we are practically limited in Lucene to what we can support with back compat, so I don't think we should overengineer this and add additional unnecessary plumbing. See also: https://issues.apache.org/jira/browse/LUCENE-5914 https://issues.apache.org/jira/browse/LUCENE-6089 https://issues.apache.org/jira/browse/LUCENE-6090 https://issues.apache.org/jira/browse/LUCENE-6100 Closes #8863
This commit is contained in:
parent
aa644e3ad7
commit
a2ffe494ae
|
@ -41,6 +41,11 @@ otherwise it is written in non-compound format.
|
|||
refresh operation will be executed. Defaults to `1s`. Can be set to `-1`
|
||||
in order to disable it.
|
||||
|
||||
`index.codec`::
|
||||
The `default` value compresses stored data with LZ4 compression, but
|
||||
this can be set to `best_compression` for a higher compression ratio,
|
||||
at the expense of slower stored fields performance.
|
||||
|
||||
`index.shard.check_on_startup`::
|
||||
Should shard consistency be checked upon opening.
|
||||
When `true`, the shard will be checked, preventing it from being open in
|
||||
|
|
4
pom.xml
4
pom.xml
|
@ -32,7 +32,7 @@
|
|||
|
||||
<properties>
|
||||
<lucene.version>5.0.0</lucene.version>
|
||||
<lucene.maven.version>5.0.0-snapshot-1642891</lucene.maven.version>
|
||||
<lucene.maven.version>5.0.0-snapshot-1644303</lucene.maven.version>
|
||||
<tests.jvms>auto</tests.jvms>
|
||||
<tests.shuffle>true</tests.shuffle>
|
||||
<tests.output>onerror</tests.output>
|
||||
|
@ -54,7 +54,7 @@
|
|||
</repository>
|
||||
<repository>
|
||||
<id>Lucene snapshots</id>
|
||||
<url>https://download.elasticsearch.org/lucenesnapshots/1642891</url>
|
||||
<url>https://download.elasticsearch.org/lucenesnapshots/1644303</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
|
|
|
@ -20,7 +20,11 @@
|
|||
package org.elasticsearch.index.codec;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.collect.MapBuilder;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
|
@ -50,6 +54,7 @@ public class CodecService extends AbstractIndexComponent {
|
|||
private final ImmutableMap<String, Codec> codecs;
|
||||
|
||||
public final static String DEFAULT_CODEC = "default";
|
||||
public final static String BEST_COMPRESSION_CODEC = "best_compression";
|
||||
|
||||
public CodecService(Index index) {
|
||||
this(index, ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
|
@ -68,9 +73,17 @@ public class CodecService extends AbstractIndexComponent {
|
|||
this.mapperService = mapperService;
|
||||
MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
|
||||
if (mapperService == null) {
|
||||
codecs.put(DEFAULT_CODEC, Codec.getDefault());
|
||||
codecs.put(DEFAULT_CODEC, new Lucene50Codec());
|
||||
codecs.put(BEST_COMPRESSION_CODEC, new Lucene50Codec(Mode.BEST_COMPRESSION));
|
||||
} else {
|
||||
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(mapperService,
|
||||
codecs.put(DEFAULT_CODEC,
|
||||
new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED,
|
||||
mapperService,
|
||||
postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(),
|
||||
docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger));
|
||||
codecs.put(BEST_COMPRESSION_CODEC,
|
||||
new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION,
|
||||
mapperService,
|
||||
postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(),
|
||||
docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger));
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.Codec;
|
|||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
|
||||
|
@ -49,7 +50,8 @@ public class PerFieldMappingPostingFormatCodec extends Lucene50Codec {
|
|||
assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCodec.class) : "PerFieldMappingPostingFormatCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
|
||||
}
|
||||
|
||||
public PerFieldMappingPostingFormatCodec(MapperService mapperService, PostingsFormat defaultPostingFormat, DocValuesFormat defaultDocValuesFormat, ESLogger logger) {
|
||||
public PerFieldMappingPostingFormatCodec(Lucene50StoredFieldsFormat.Mode compressionMode, MapperService mapperService, PostingsFormat defaultPostingFormat, DocValuesFormat defaultDocValuesFormat, ESLogger logger) {
|
||||
super(compressionMode);
|
||||
this.mapperService = mapperService;
|
||||
this.logger = logger;
|
||||
this.defaultPostingFormat = defaultPostingFormat;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.index.engine.internal;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
||||
import org.apache.lucene.search.*;
|
||||
|
@ -69,6 +70,7 @@ import org.elasticsearch.threadpool.ThreadPool;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
@ -1054,10 +1056,13 @@ public class InternalEngine implements Engine {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: can we please remove this method?!
|
||||
private void waitForMerges(boolean flushAfter) {
|
||||
try {
|
||||
currentIndexWriter().waitForMerges();
|
||||
} catch (IOException e) {
|
||||
Method method = IndexWriter.class.getDeclaredMethod("waitForMerges");
|
||||
method.setAccessible(true);
|
||||
method.invoke(currentIndexWriter());
|
||||
} catch (ReflectiveOperationException e) {
|
||||
throw new OptimizeFailedEngineException(shardId, e);
|
||||
}
|
||||
if (flushAfter) {
|
||||
|
|
|
@ -33,7 +33,15 @@ import org.apache.lucene.codecs.lucene46.Lucene46Codec;
|
|||
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -174,6 +182,34 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase {
|
|||
assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider().get(), instanceOf(Lucene410DocValuesFormat.class));
|
||||
}
|
||||
|
||||
public void testDefault() throws Exception {
|
||||
Codec codec = createCodecService().codec("default");
|
||||
assertCompressionEquals(Mode.BEST_SPEED, codec);
|
||||
}
|
||||
|
||||
public void testBestCompression() throws Exception {
|
||||
Codec codec = createCodecService().codec("best_compression");
|
||||
assertCompressionEquals(Mode.BEST_COMPRESSION, codec);
|
||||
}
|
||||
|
||||
// write some docs with it, inspect .si to see this was the used compression
|
||||
private void assertCompressionEquals(Mode expected, Codec actual) throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||
iwc.setCodec(actual);
|
||||
IndexWriter iw = new IndexWriter(dir, iwc);
|
||||
iw.addDocument(new Document());
|
||||
iw.commit();
|
||||
iw.close();
|
||||
DirectoryReader ir = DirectoryReader.open(dir);
|
||||
SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader();
|
||||
String v = sr.getSegmentInfo().info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY);
|
||||
assertNotNull(v);
|
||||
assertEquals(expected, Mode.valueOf(v));
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static CodecService createCodecService() {
|
||||
return createCodecService(ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
}
|
||||
|
|
|
@ -187,6 +187,7 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
|||
IOUtils.close(verifyingOutput, dir);
|
||||
}
|
||||
|
||||
// TODO: remove this, its too fragile. just use a static old index instead.
|
||||
private static final class OldSIMockingCodec extends FilterCodec {
|
||||
|
||||
protected OldSIMockingCodec() {
|
||||
|
@ -232,6 +233,7 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
|||
}
|
||||
}
|
||||
output.writeStringSet(files);
|
||||
output.writeStringStringMap(si.getAttributes());
|
||||
CodecUtil.writeFooter(output);
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -245,6 +247,7 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// IF THIS TEST FAILS ON UPGRADE GO LOOK AT THE OldSIMockingCodec!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
@Test
|
||||
public void testWriteLegacyChecksums() throws IOException {
|
||||
final ShardId shardId = new ShardId(new Index("index"), 1);
|
||||
|
|
|
@ -315,6 +315,8 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
|
|||
|
||||
randomSettingsBuilder.put(SETTING_NUMBER_OF_SHARDS, numberOfShards())
|
||||
.put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas());
|
||||
|
||||
randomSettingsBuilder.put("index.codec", randomFrom("default", "best_compression"));
|
||||
XContentBuilder mappings = null;
|
||||
if (frequently() && randomDynamicTemplates()) {
|
||||
mappings = XContentFactory.jsonBuilder().startObject().startObject("_default_");
|
||||
|
|
Loading…
Reference in New Issue