[core] add best_compression option for Lucene 5.0
Upgrades lucene to latest, and supports the BEST_COMPRESSION parameter now supported (with backwards compatibility, etc) in Lucene. This option uses deflate, tuned for highly compressible data. index.codec:: The default value compresses stored data with LZ4 compression, but this can be set to best_compression for a higher compression ratio, at the expense of slower stored fields performance. IMO its safest to implement as a named codec here, because ES already has logic to handle this correctly, and because its unrealistic to have a plethora of options to Lucene's default codec... we are practically limited in Lucene to what we can support with back compat, so I don't think we should overengineer this and add additional unnecessary plumbing. See also: https://issues.apache.org/jira/browse/LUCENE-5914 https://issues.apache.org/jira/browse/LUCENE-6089 https://issues.apache.org/jira/browse/LUCENE-6090 https://issues.apache.org/jira/browse/LUCENE-6100 Closes #8863
This commit is contained in:
parent
aa644e3ad7
commit
a2ffe494ae
|
@ -41,6 +41,11 @@ otherwise it is written in non-compound format.
|
||||||
refresh operation will be executed. Defaults to `1s`. Can be set to `-1`
|
refresh operation will be executed. Defaults to `1s`. Can be set to `-1`
|
||||||
in order to disable it.
|
in order to disable it.
|
||||||
|
|
||||||
|
`index.codec`::
|
||||||
|
The `default` value compresses stored data with LZ4 compression, but
|
||||||
|
this can be set to `best_compression` for a higher compression ratio,
|
||||||
|
at the expense of slower stored fields performance.
|
||||||
|
|
||||||
`index.shard.check_on_startup`::
|
`index.shard.check_on_startup`::
|
||||||
Should shard consistency be checked upon opening.
|
Should shard consistency be checked upon opening.
|
||||||
When `true`, the shard will be checked, preventing it from being open in
|
When `true`, the shard will be checked, preventing it from being open in
|
||||||
|
|
4
pom.xml
4
pom.xml
|
@ -32,7 +32,7 @@
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<lucene.version>5.0.0</lucene.version>
|
<lucene.version>5.0.0</lucene.version>
|
||||||
<lucene.maven.version>5.0.0-snapshot-1642891</lucene.maven.version>
|
<lucene.maven.version>5.0.0-snapshot-1644303</lucene.maven.version>
|
||||||
<tests.jvms>auto</tests.jvms>
|
<tests.jvms>auto</tests.jvms>
|
||||||
<tests.shuffle>true</tests.shuffle>
|
<tests.shuffle>true</tests.shuffle>
|
||||||
<tests.output>onerror</tests.output>
|
<tests.output>onerror</tests.output>
|
||||||
|
@ -54,7 +54,7 @@
|
||||||
</repository>
|
</repository>
|
||||||
<repository>
|
<repository>
|
||||||
<id>Lucene snapshots</id>
|
<id>Lucene snapshots</id>
|
||||||
<url>https://download.elasticsearch.org/lucenesnapshots/1642891</url>
|
<url>https://download.elasticsearch.org/lucenesnapshots/1644303</url>
|
||||||
</repository>
|
</repository>
|
||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,11 @@
|
||||||
package org.elasticsearch.index.codec;
|
package org.elasticsearch.index.codec;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
|
||||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.collect.MapBuilder;
|
import org.elasticsearch.common.collect.MapBuilder;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
@ -50,6 +54,7 @@ public class CodecService extends AbstractIndexComponent {
|
||||||
private final ImmutableMap<String, Codec> codecs;
|
private final ImmutableMap<String, Codec> codecs;
|
||||||
|
|
||||||
public final static String DEFAULT_CODEC = "default";
|
public final static String DEFAULT_CODEC = "default";
|
||||||
|
public final static String BEST_COMPRESSION_CODEC = "best_compression";
|
||||||
|
|
||||||
public CodecService(Index index) {
|
public CodecService(Index index) {
|
||||||
this(index, ImmutableSettings.Builder.EMPTY_SETTINGS);
|
this(index, ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||||
|
@ -68,9 +73,17 @@ public class CodecService extends AbstractIndexComponent {
|
||||||
this.mapperService = mapperService;
|
this.mapperService = mapperService;
|
||||||
MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
|
MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
|
||||||
if (mapperService == null) {
|
if (mapperService == null) {
|
||||||
codecs.put(DEFAULT_CODEC, Codec.getDefault());
|
codecs.put(DEFAULT_CODEC, new Lucene50Codec());
|
||||||
|
codecs.put(BEST_COMPRESSION_CODEC, new Lucene50Codec(Mode.BEST_COMPRESSION));
|
||||||
} else {
|
} else {
|
||||||
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(mapperService,
|
codecs.put(DEFAULT_CODEC,
|
||||||
|
new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED,
|
||||||
|
mapperService,
|
||||||
|
postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(),
|
||||||
|
docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger));
|
||||||
|
codecs.put(BEST_COMPRESSION_CODEC,
|
||||||
|
new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION,
|
||||||
|
mapperService,
|
||||||
postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(),
|
postingsFormatService.get(PostingsFormatService.DEFAULT_FORMAT).get(),
|
||||||
docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger));
|
docValuesFormatService.get(DocValuesFormatService.DEFAULT_FORMAT).get(), logger));
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||||
import org.elasticsearch.common.logging.ESLogger;
|
import org.elasticsearch.common.logging.ESLogger;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
|
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
|
||||||
|
@ -49,7 +50,8 @@ public class PerFieldMappingPostingFormatCodec extends Lucene50Codec {
|
||||||
assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCodec.class) : "PerFieldMappingPostingFormatCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
|
assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCodec.class) : "PerFieldMappingPostingFormatCodec must subclass the latest lucene codec: " + Lucene.LATEST_CODEC;
|
||||||
}
|
}
|
||||||
|
|
||||||
public PerFieldMappingPostingFormatCodec(MapperService mapperService, PostingsFormat defaultPostingFormat, DocValuesFormat defaultDocValuesFormat, ESLogger logger) {
|
public PerFieldMappingPostingFormatCodec(Lucene50StoredFieldsFormat.Mode compressionMode, MapperService mapperService, PostingsFormat defaultPostingFormat, DocValuesFormat defaultDocValuesFormat, ESLogger logger) {
|
||||||
|
super(compressionMode);
|
||||||
this.mapperService = mapperService;
|
this.mapperService = mapperService;
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
this.defaultPostingFormat = defaultPostingFormat;
|
this.defaultPostingFormat = defaultPostingFormat;
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
package org.elasticsearch.index.engine.internal;
|
package org.elasticsearch.index.engine.internal;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
|
@ -69,6 +70,7 @@ import org.elasticsearch.threadpool.ThreadPool;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
@ -1054,10 +1056,13 @@ public class InternalEngine implements Engine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: can we please remove this method?!
|
||||||
private void waitForMerges(boolean flushAfter) {
|
private void waitForMerges(boolean flushAfter) {
|
||||||
try {
|
try {
|
||||||
currentIndexWriter().waitForMerges();
|
Method method = IndexWriter.class.getDeclaredMethod("waitForMerges");
|
||||||
} catch (IOException e) {
|
method.setAccessible(true);
|
||||||
|
method.invoke(currentIndexWriter());
|
||||||
|
} catch (ReflectiveOperationException e) {
|
||||||
throw new OptimizeFailedEngineException(shardId, e);
|
throw new OptimizeFailedEngineException(shardId, e);
|
||||||
}
|
}
|
||||||
if (flushAfter) {
|
if (flushAfter) {
|
||||||
|
|
|
@ -33,7 +33,15 @@ import org.apache.lucene.codecs.lucene46.Lucene46Codec;
|
||||||
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
|
import org.apache.lucene.codecs.lucene49.Lucene49Codec;
|
||||||
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||||
import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
|
import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
|
||||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.SegmentReader;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
import org.elasticsearch.common.lucene.Lucene;
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -173,6 +181,34 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase {
|
||||||
assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider(), instanceOf(PreBuiltDocValuesFormatProvider.class));
|
assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider(), instanceOf(PreBuiltDocValuesFormatProvider.class));
|
||||||
assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider().get(), instanceOf(Lucene410DocValuesFormat.class));
|
assertThat(documentMapper.rootMapper(VersionFieldMapper.class).docValuesFormatProvider().get(), instanceOf(Lucene410DocValuesFormat.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDefault() throws Exception {
|
||||||
|
Codec codec = createCodecService().codec("default");
|
||||||
|
assertCompressionEquals(Mode.BEST_SPEED, codec);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBestCompression() throws Exception {
|
||||||
|
Codec codec = createCodecService().codec("best_compression");
|
||||||
|
assertCompressionEquals(Mode.BEST_COMPRESSION, codec);
|
||||||
|
}
|
||||||
|
|
||||||
|
// write some docs with it, inspect .si to see this was the used compression
|
||||||
|
private void assertCompressionEquals(Mode expected, Codec actual) throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||||
|
iwc.setCodec(actual);
|
||||||
|
IndexWriter iw = new IndexWriter(dir, iwc);
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
iw.commit();
|
||||||
|
iw.close();
|
||||||
|
DirectoryReader ir = DirectoryReader.open(dir);
|
||||||
|
SegmentReader sr = (SegmentReader) ir.leaves().get(0).reader();
|
||||||
|
String v = sr.getSegmentInfo().info.getAttribute(Lucene50StoredFieldsFormat.MODE_KEY);
|
||||||
|
assertNotNull(v);
|
||||||
|
assertEquals(expected, Mode.valueOf(v));
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
private static CodecService createCodecService() {
|
private static CodecService createCodecService() {
|
||||||
return createCodecService(ImmutableSettings.Builder.EMPTY_SETTINGS);
|
return createCodecService(ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||||
|
|
|
@ -187,6 +187,7 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
||||||
IOUtils.close(verifyingOutput, dir);
|
IOUtils.close(verifyingOutput, dir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: remove this, its too fragile. just use a static old index instead.
|
||||||
private static final class OldSIMockingCodec extends FilterCodec {
|
private static final class OldSIMockingCodec extends FilterCodec {
|
||||||
|
|
||||||
protected OldSIMockingCodec() {
|
protected OldSIMockingCodec() {
|
||||||
|
@ -232,6 +233,7 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output.writeStringSet(files);
|
output.writeStringSet(files);
|
||||||
|
output.writeStringStringMap(si.getAttributes());
|
||||||
CodecUtil.writeFooter(output);
|
CodecUtil.writeFooter(output);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -245,6 +247,7 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IF THIS TEST FAILS ON UPGRADE GO LOOK AT THE OldSIMockingCodec!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||||
@Test
|
@Test
|
||||||
public void testWriteLegacyChecksums() throws IOException {
|
public void testWriteLegacyChecksums() throws IOException {
|
||||||
final ShardId shardId = new ShardId(new Index("index"), 1);
|
final ShardId shardId = new ShardId(new Index("index"), 1);
|
||||||
|
|
|
@ -315,6 +315,8 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
|
||||||
|
|
||||||
randomSettingsBuilder.put(SETTING_NUMBER_OF_SHARDS, numberOfShards())
|
randomSettingsBuilder.put(SETTING_NUMBER_OF_SHARDS, numberOfShards())
|
||||||
.put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas());
|
.put(SETTING_NUMBER_OF_REPLICAS, numberOfReplicas());
|
||||||
|
|
||||||
|
randomSettingsBuilder.put("index.codec", randomFrom("default", "best_compression"));
|
||||||
XContentBuilder mappings = null;
|
XContentBuilder mappings = null;
|
||||||
if (frequently() && randomDynamicTemplates()) {
|
if (frequently() && randomDynamicTemplates()) {
|
||||||
mappings = XContentFactory.jsonBuilder().startObject().startObject("_default_");
|
mappings = XContentFactory.jsonBuilder().startObject().startObject("_default_");
|
||||||
|
|
Loading…
Reference in New Issue