Disable bloom filters.

make the "es090" postings format read-only, just to support old segments. There is a test version that subclasses it with write-capability for testing.

Closes #8571
This commit is contained in:
Robert Muir 2014-11-20 21:00:29 -05:00
parent 3e1b7c7a34
commit 9ef69f9f36
17 changed files with 99 additions and 360 deletions

View File

@ -57,10 +57,6 @@ settings API:
`index.index_concurrency`:: `index.index_concurrency`::
Defaults to `8`. Defaults to `8`.
`index.codec.bloom.load`::
Whether to load the bloom filter. Defaults to `false`.
See <<codec-bloom-load>>.
`index.fail_on_merge_failure`:: `index.fail_on_merge_failure`::
Default to `true`. Default to `true`.
@ -227,35 +223,3 @@ curl -XPUT 'localhost:9200/myindex/_settings' -d '{
curl -XPOST 'localhost:9200/myindex/_open' curl -XPOST 'localhost:9200/myindex/_open'
-------------------------------------------------- --------------------------------------------------
[float]
[[codec-bloom-load]]
=== Bloom filters
Up to version 1.3, Elasticsearch used to generate bloom filters for the `_uid`
field at indexing time and to load them at search time in order to speed-up
primary-key lookups by savings disk seeks.
As of 1.4, bloom filters are still generated at indexing time, but they are
no longer loaded at search time by default: they consume RAM in proportion to
the number of unique terms, which can quickly add up for certain use cases,
and separate performance improvements have made the performance gains with
bloom filters very small.
[TIP]
==================================================
You can enable loading of the bloom filter at search time on a
per-index basis by updating the index settings:
[source,js]
--------------------------------------------------
PUT /old_index/_settings?index.codec.bloom.load=true
--------------------------------------------------
This setting, which defaults to `false`, can be updated on a live index. Note,
however, that changing the value will cause the index to be reopened, which
will invalidate any existing caches.
==================================================

View File

@ -44,16 +44,11 @@ import org.elasticsearch.index.settings.IndexSettings;
*/ */
public class CodecService extends AbstractIndexComponent { public class CodecService extends AbstractIndexComponent {
public static final String INDEX_CODEC_BLOOM_LOAD = "index.codec.bloom.load";
public static final boolean INDEX_CODEC_BLOOM_LOAD_DEFAULT = false;
private final PostingsFormatService postingsFormatService; private final PostingsFormatService postingsFormatService;
private final DocValuesFormatService docValuesFormatService; private final DocValuesFormatService docValuesFormatService;
private final MapperService mapperService; private final MapperService mapperService;
private final ImmutableMap<String, Codec> codecs; private final ImmutableMap<String, Codec> codecs;
private volatile boolean loadBloomFilter = true;
public final static String DEFAULT_CODEC = "default"; public final static String DEFAULT_CODEC = "default";
public CodecService(Index index) { public CodecService(Index index) {
@ -83,7 +78,6 @@ public class CodecService extends AbstractIndexComponent {
codecs.put(codec, Codec.forName(codec)); codecs.put(codec, Codec.forName(codec));
} }
this.codecs = codecs.immutableMap(); this.codecs = codecs.immutableMap();
this.loadBloomFilter = indexSettings.getAsBoolean(INDEX_CODEC_BLOOM_LOAD, INDEX_CODEC_BLOOM_LOAD_DEFAULT);
} }
public PostingsFormatService postingsFormatService() { public PostingsFormatService postingsFormatService() {
@ -105,12 +99,4 @@ public class CodecService extends AbstractIndexComponent {
} }
return codec; return codec;
} }
public boolean isLoadBloomFilter() {
return this.loadBloomFilter;
}
public void setLoadBloomFilter(boolean loadBloomFilter) {
this.loadBloomFilter = loadBloomFilter;
}
} }

View File

@ -24,8 +24,6 @@ import org.apache.lucene.index.*;
import org.apache.lucene.store.*; import org.apache.lucene.store.*;
import org.apache.lucene.util.*; import org.apache.lucene.util.*;
import org.elasticsearch.common.util.BloomFilter; import org.elasticsearch.common.util.BloomFilter;
import org.elasticsearch.index.store.DirectoryUtils;
import org.elasticsearch.index.store.Store;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
@ -42,7 +40,9 @@ import java.util.Map.Entry;
* This is a special bloom filter version, based on {@link org.elasticsearch.common.util.BloomFilter} and inspired * This is a special bloom filter version, based on {@link org.elasticsearch.common.util.BloomFilter} and inspired
* by Lucene {@link org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat}. * by Lucene {@link org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat}.
* </p> * </p>
* @deprecated only for reading old segments
*/ */
@Deprecated
public final class BloomFilterPostingsFormat extends PostingsFormat { public final class BloomFilterPostingsFormat extends PostingsFormat {
public static final String BLOOM_CODEC_NAME = "XBloomFilter"; // the Lucene one is named BloomFilter public static final String BLOOM_CODEC_NAME = "XBloomFilter"; // the Lucene one is named BloomFilter
@ -160,30 +160,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
// // Load the hash function used in the BloomFilter // // Load the hash function used in the BloomFilter
// hashFunction = HashFunction.forName(bloomIn.readString()); // hashFunction = HashFunction.forName(bloomIn.readString());
// Load the delegate postings format // Load the delegate postings format
final String delegatePostings = bloomIn final String delegatePostings = bloomIn.readString();
.readString();
int numBlooms = bloomIn.readInt();
boolean load = false;
Store.StoreDirectory storeDir = DirectoryUtils.getStoreDirectory(state.directory);
if (storeDir != null && storeDir.codecService() != null) {
load = storeDir.codecService().isLoadBloomFilter();
}
if (load) {
for (int i = 0; i < numBlooms; i++) {
int fieldNum = bloomIn.readInt();
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
LazyBloomLoader loader = new LazyBloomLoader(bloomIn.getFilePointer(), dataInput);
bloomsByFieldName.put(fieldInfo.name, loader);
BloomFilter.skipBloom(bloomIn);
}
if (version >= BLOOM_CODEC_VERSION_CHECKSUM) {
CodecUtil.checkFooter(bloomIn);
} else {
CodecUtil.checkEOF(bloomIn);
}
}
this.delegateFieldsProducer = PostingsFormat.forName(delegatePostings) this.delegateFieldsProducer = PostingsFormat.forName(delegatePostings)
.fieldsProducer(state); .fieldsProducer(state);
this.data = dataInput; this.data = dataInput;
@ -383,8 +360,9 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
} }
// TODO: would be great to move this out to test code, but the interaction between es090 and bloom is complex
final class BloomFilteredFieldsConsumer extends FieldsConsumer { // at least it is not accessible via SPI
public final class BloomFilteredFieldsConsumer extends FieldsConsumer {
private FieldsConsumer delegateFieldsConsumer; private FieldsConsumer delegateFieldsConsumer;
private Map<FieldInfo, BloomFilter> bloomFilters = new HashMap<>(); private Map<FieldInfo, BloomFilter> bloomFilters = new HashMap<>();
private SegmentWriteState state; private SegmentWriteState state;
@ -399,7 +377,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
} }
// for internal use only // for internal use only
FieldsConsumer getDelegate() { public FieldsConsumer getDelegate() {
return delegateFieldsConsumer; return delegateFieldsConsumer;
} }

View File

@ -30,7 +30,9 @@ import org.elasticsearch.index.settings.IndexSettings;
import java.util.Map; import java.util.Map;
/** /**
* @deprecated only for reading old segments
*/ */
@Deprecated
public class BloomFilterPostingsFormatProvider extends AbstractPostingsFormatProvider { public class BloomFilterPostingsFormatProvider extends AbstractPostingsFormatProvider {
private final PostingsFormatProvider delegate; private final PostingsFormatProvider delegate;

View File

@ -38,14 +38,17 @@ import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
/** /**
* This is the default postings format for Elasticsearch that special cases * This is the old default postings format for Elasticsearch that special cases
* the <tt>_uid</tt> field to use a bloom filter while all other fields * the <tt>_uid</tt> field to use a bloom filter while all other fields
* will use a {@link Lucene50PostingsFormat}. This format will reuse the underlying * will use a {@link Lucene50PostingsFormat}. This format will reuse the underlying
* {@link Lucene50PostingsFormat} and its files also for the <tt>_uid</tt> saving up to * {@link Lucene50PostingsFormat} and its files also for the <tt>_uid</tt> saving up to
* 5 files per segment in the default case. * 5 files per segment in the default case.
* <p>
* @deprecated only for reading old segments
*/ */
public final class Elasticsearch090PostingsFormat extends PostingsFormat { @Deprecated
private final BloomFilterPostingsFormat bloomPostings; public class Elasticsearch090PostingsFormat extends PostingsFormat {
protected final BloomFilterPostingsFormat bloomPostings;
public Elasticsearch090PostingsFormat() { public Elasticsearch090PostingsFormat() {
super("es090"); super("es090");
@ -57,7 +60,7 @@ public final class Elasticsearch090PostingsFormat extends PostingsFormat {
public PostingsFormat getDefaultWrapped() { public PostingsFormat getDefaultWrapped() {
return bloomPostings.getDelegate(); return bloomPostings.getDelegate();
} }
private static final Predicate<String> UID_FIELD_FILTER = new Predicate<String>() { protected static final Predicate<String> UID_FIELD_FILTER = new Predicate<String>() {
@Override @Override
public boolean apply(String s) { public boolean apply(String s) {
@ -67,34 +70,7 @@ public final class Elasticsearch090PostingsFormat extends PostingsFormat {
@Override @Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state); throw new UnsupportedOperationException("this codec can only be used for reading");
return new FieldsConsumer() {
@Override
public void write(Fields fields) throws IOException {
Fields maskedFields = new FilterLeafReader.FilterFields(fields) {
@Override
public Iterator<String> iterator() {
return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER));
}
};
fieldsConsumer.getDelegate().write(maskedFields);
maskedFields = new FilterLeafReader.FilterFields(fields) {
@Override
public Iterator<String> iterator() {
return Iterators.singletonIterator(UidFieldMapper.NAME);
}
};
// only go through bloom for the UID field
fieldsConsumer.write(maskedFields);
}
@Override
public void close() throws IOException {
fieldsConsumer.close();
}
};
} }
@Override @Override

View File

@ -30,10 +30,7 @@ import org.elasticsearch.common.util.BloomFilter;
* This class represents the set of Elasticsearch "built-in" * This class represents the set of Elasticsearch "built-in"
* {@link PostingsFormatProvider.Factory postings format factories} * {@link PostingsFormatProvider.Factory postings format factories}
* <ul> * <ul>
* <li><b>bloom_default</b>: a postings format that uses a bloom filter to * <li><b>default</b>: the default Lucene postings format offering best
* improve term lookup performance. This is useful for primarily keys or fields
* that are used as a delete key</li>
* <li><b>default</b>: the default Elasticsearch postings format offering best
* general purpose performance. This format is used if no postings format is * general purpose performance. This format is used if no postings format is
* specified in the field mapping.</li> * specified in the field mapping.</li>
* <li><b>***</b>: other formats from Lucene core (e.g. Lucene41 as of Lucene 4.10) * <li><b>***</b>: other formats from Lucene core (e.g. Lucene41 as of Lucene 4.10)
@ -51,12 +48,10 @@ public class PostingFormats {
for (String luceneName : PostingsFormat.availablePostingsFormats()) { for (String luceneName : PostingsFormat.availablePostingsFormats()) {
builtInPostingFormatsX.put(luceneName, new PreBuiltPostingsFormatProvider.Factory(PostingsFormat.forName(luceneName))); builtInPostingFormatsX.put(luceneName, new PreBuiltPostingsFormatProvider.Factory(PostingsFormat.forName(luceneName)));
} }
final PostingsFormat defaultFormat = new Elasticsearch090PostingsFormat(); final PostingsFormat defaultFormat = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
builtInPostingFormatsX.put(PostingsFormatService.DEFAULT_FORMAT, builtInPostingFormatsX.put(PostingsFormatService.DEFAULT_FORMAT,
new PreBuiltPostingsFormatProvider.Factory(PostingsFormatService.DEFAULT_FORMAT, defaultFormat)); new PreBuiltPostingsFormatProvider.Factory(PostingsFormatService.DEFAULT_FORMAT, defaultFormat));
builtInPostingFormatsX.put("bloom_default", new PreBuiltPostingsFormatProvider.Factory("bloom_default", wrapInBloom(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT))));
builtInPostingFormats = builtInPostingFormatsX.immutableMap(); builtInPostingFormats = builtInPostingFormatsX.immutableMap();
} }

View File

@ -1522,12 +1522,10 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
int indexConcurrency = settings.getAsInt(INDEX_INDEX_CONCURRENCY, InternalEngine.this.indexConcurrency); int indexConcurrency = settings.getAsInt(INDEX_INDEX_CONCURRENCY, InternalEngine.this.indexConcurrency);
boolean failOnMergeFailure = settings.getAsBoolean(INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure); boolean failOnMergeFailure = settings.getAsBoolean(INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure);
String codecName = settings.get(INDEX_CODEC, InternalEngine.this.codecName); String codecName = settings.get(INDEX_CODEC, InternalEngine.this.codecName);
final boolean codecBloomLoad = settings.getAsBoolean(CodecService.INDEX_CODEC_BLOOM_LOAD, codecService.isLoadBloomFilter());
boolean requiresFlushing = false; boolean requiresFlushing = false;
if (indexConcurrency != InternalEngine.this.indexConcurrency || if (indexConcurrency != InternalEngine.this.indexConcurrency ||
!codecName.equals(InternalEngine.this.codecName) || !codecName.equals(InternalEngine.this.codecName) ||
failOnMergeFailure != InternalEngine.this.failOnMergeFailure || failOnMergeFailure != InternalEngine.this.failOnMergeFailure) {
codecBloomLoad != codecService.isLoadBloomFilter()) {
try (InternalLock _ = readLock.acquire()) { try (InternalLock _ = readLock.acquire()) {
if (indexConcurrency != InternalEngine.this.indexConcurrency) { if (indexConcurrency != InternalEngine.this.indexConcurrency) {
logger.info("updating index.index_concurrency from [{}] to [{}]", InternalEngine.this.indexConcurrency, indexConcurrency); logger.info("updating index.index_concurrency from [{}] to [{}]", InternalEngine.this.indexConcurrency, indexConcurrency);
@ -1545,12 +1543,6 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
logger.info("updating {} from [{}] to [{}]", InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure, failOnMergeFailure); logger.info("updating {} from [{}] to [{}]", InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure, failOnMergeFailure);
InternalEngine.this.failOnMergeFailure = failOnMergeFailure; InternalEngine.this.failOnMergeFailure = failOnMergeFailure;
} }
if (codecBloomLoad != codecService.isLoadBloomFilter()) {
logger.info("updating {} from [{}] to [{}]", CodecService.INDEX_CODEC_BLOOM_LOAD, codecService.isLoadBloomFilter(), codecBloomLoad);
codecService.setLoadBloomFilter(codecBloomLoad);
// we need to flush in this case, to load/unload the bloom filters
requiresFlushing = true;
}
} }
if (requiresFlushing) { if (requiresFlushing) {
flush(new Flush().type(Flush.Type.NEW_WRITER)); flush(new Flush().type(Flush.Type.NEW_WRITER));

View File

@ -85,7 +85,6 @@ public class IndexDynamicSettingsModule extends AbstractModule {
indexDynamicSettings.addDynamicSetting(LogDocMergePolicyProvider.INDEX_COMPOUND_FORMAT); indexDynamicSettings.addDynamicSetting(LogDocMergePolicyProvider.INDEX_COMPOUND_FORMAT);
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_INDEX_CONCURRENCY, Validator.NON_NEGATIVE_INTEGER); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_INDEX_CONCURRENCY, Validator.NON_NEGATIVE_INTEGER);
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_COMPOUND_ON_FLUSH, Validator.BOOLEAN); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_COMPOUND_ON_FLUSH, Validator.BOOLEAN);
indexDynamicSettings.addDynamicSetting(CodecService.INDEX_CODEC_BLOOM_LOAD, Validator.BOOLEAN);
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_GC_DELETES, Validator.TIME); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_GC_DELETES, Validator.TIME);
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_CODEC); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_CODEC);
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE);

View File

@ -550,12 +550,6 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
return Store.this.shardId(); return Store.this.shardId();
} }
@Nullable
public CodecService codecService() {
ensureOpen();
return Store.this.codecService;
}
@Override @Override
public void close() throws IOException { public void close() throws IOException {
assert false : "Nobody should close this directory except of the Store itself"; assert false : "Nobody should close this directory except of the Store itself";

View File

@ -1,3 +1,2 @@
org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat
org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat
org.elasticsearch.search.suggest.completion.Completion090PostingsFormat org.elasticsearch.search.suggest.completion.Completion090PostingsFormat

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.codec; package org.elasticsearch.index.codec;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat; import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40Codec; import org.apache.lucene.codecs.lucene40.Lucene40Codec;
import org.apache.lucene.codecs.lucene41.Lucene41Codec; import org.apache.lucene.codecs.lucene41.Lucene41Codec;
@ -33,6 +34,7 @@ import org.apache.lucene.codecs.lucene49.Lucene49Codec;
import org.apache.lucene.codecs.lucene50.Lucene50Codec; import org.apache.lucene.codecs.lucene50.Lucene50Codec;
import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat; import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
@ -80,25 +82,16 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase {
public void testResolveDefaultPostingFormats() throws Exception { public void testResolveDefaultPostingFormats() throws Exception {
PostingsFormatService postingsFormatService = createCodecService().postingsFormatService(); PostingsFormatService postingsFormatService = createCodecService().postingsFormatService();
assertThat(postingsFormatService.get("default"), instanceOf(PreBuiltPostingsFormatProvider.class)); assertThat(postingsFormatService.get("default"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("default").get(), instanceOf(Elasticsearch090PostingsFormat.class)); PostingsFormat luceneDefault = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
assertThat(postingsFormatService.get("default").get(), instanceOf(luceneDefault.getClass()));
// Should fail when upgrading Lucene with codec changes // Should fail when upgrading Lucene with codec changes
assertThat(((Elasticsearch090PostingsFormat)postingsFormatService.get("default").get()).getDefaultWrapped(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField("").getClass()));
assertThat(postingsFormatService.get("Lucene41"), instanceOf(PreBuiltPostingsFormatProvider.class)); assertThat(postingsFormatService.get("Lucene41"), instanceOf(PreBuiltPostingsFormatProvider.class));
// Should fail when upgrading Lucene with codec changes // Should fail when upgrading Lucene with codec changes
assertThat(postingsFormatService.get("Lucene50").get(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField(null).getClass())); assertThat(postingsFormatService.get("Lucene50").get(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField(null).getClass()));
assertThat(postingsFormatService.get("bloom_default"), instanceOf(PreBuiltPostingsFormatProvider.class));
if (PostingFormats.luceneBloomFilter) {
assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilteringPostingsFormat.class));
} else {
assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilterPostingsFormat.class));
}
assertThat(postingsFormatService.get("BloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class)); assertThat(postingsFormatService.get("BloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("BloomFilter").get(), instanceOf(BloomFilteringPostingsFormat.class)); assertThat(postingsFormatService.get("BloomFilter").get(), instanceOf(BloomFilteringPostingsFormat.class));
assertThat(postingsFormatService.get("XBloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("XBloomFilter").get(), instanceOf(BloomFilterPostingsFormat.class));
} }
@Test @Test
@ -128,7 +121,8 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase {
CodecService codecService = createCodecService(indexSettings); CodecService codecService = createCodecService(indexSettings);
DocumentMapper documentMapper = codecService.mapperService().documentMapperParser().parse(mapping); DocumentMapper documentMapper = codecService.mapperService().documentMapperParser().parse(mapping);
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider(), instanceOf(PreBuiltPostingsFormatProvider.class)); assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider(), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(Elasticsearch090PostingsFormat.class)); PostingsFormat luceneDefault = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(luceneDefault.getClass()));
assertThat(documentMapper.mappers().name("field2").mapper().postingsFormatProvider(), instanceOf(DefaultPostingsFormatProvider.class)); assertThat(documentMapper.mappers().name("field2").mapper().postingsFormatProvider(), instanceOf(DefaultPostingsFormatProvider.class));
DefaultPostingsFormatProvider provider = (DefaultPostingsFormatProvider) documentMapper.mappers().name("field2").mapper().postingsFormatProvider(); DefaultPostingsFormatProvider provider = (DefaultPostingsFormatProvider) documentMapper.mappers().name("field2").mapper().postingsFormatProvider();

View File

@ -1,122 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.codec.postingformat;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat;
import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.hamcrest.Matchers.*;
/**
* Simple smoke test for {@link org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat}
*/
public class DefaultPostingsFormatTests extends ElasticsearchTestCase {
private final class TestCodec extends Lucene50Codec {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return new Elasticsearch090PostingsFormat();
}
}
@Test
public void testUseDefault() throws IOException {
Codec codec = new TestCodec();
Directory d = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
config.setCodec(codec);
IndexWriter writer = new IndexWriter(d, config);
writer.addDocument(Arrays.asList(new TextField("foo", "bar", Store.YES), new TextField(UidFieldMapper.NAME, "1234", Store.YES)));
writer.commit();
DirectoryReader reader = DirectoryReader.open(writer, false);
List<LeafReaderContext> leaves = reader.leaves();
assertThat(leaves.size(), equalTo(1));
LeafReader ar = leaves.get(0).reader();
Terms terms = ar.terms("foo");
Terms uidTerms = ar.terms(UidFieldMapper.NAME);
assertThat(terms.size(), equalTo(1l));
assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
assertThat(uidTerms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
reader.close();
writer.close();
d.close();
}
@Test
public void testNoUIDField() throws IOException {
Codec codec = new TestCodec();
Directory d = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
config.setCodec(codec);
IndexWriter writer = new IndexWriter(d, config);
for (int i = 0; i < 100; i++) {
writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES), new TextField("some_other_field", "1234", Store.YES)));
}
writer.forceMerge(1, true);
writer.commit();
DirectoryReader reader = DirectoryReader.open(writer, false);
List<LeafReaderContext> leaves = reader.leaves();
assertThat(leaves.size(), equalTo(1));
LeafReader ar = leaves.get(0).reader();
Terms terms = ar.terms("foo");
Terms some_other_field = ar.terms("some_other_field");
assertThat(terms.size(), equalTo(2l));
assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
TermsEnum iterator = terms.iterator(null);
Set<String> expected = new HashSet<>();
expected.add("foo");
expected.add("bar");
while(iterator.next() != null) {
expected.remove(iterator.term().utf8ToString());
}
assertThat(expected.size(), equalTo(0));
reader.close();
writer.close();
d.close();
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.codec.postingformat;
import com.google.common.base.Predicates;
import com.google.common.collect.Iterators;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.SegmentWriteState;
import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat.BloomFilteredFieldsConsumer;
import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import java.io.IOException;
import java.util.Iterator;
/** read-write version with blooms for testing */
public class Elasticsearch090RWPostingsFormat extends Elasticsearch090PostingsFormat {
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state);
return new FieldsConsumer() {
@Override
public void write(Fields fields) throws IOException {
Fields maskedFields = new FilterLeafReader.FilterFields(fields) {
@Override
public Iterator<String> iterator() {
return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER));
}
};
fieldsConsumer.getDelegate().write(maskedFields);
maskedFields = new FilterLeafReader.FilterFields(fields) {
@Override
public Iterator<String> iterator() {
return Iterators.singletonIterator(UidFieldMapper.NAME);
}
};
// only go through bloom for the UID field
fieldsConsumer.write(maskedFields);
}
@Override
public void close() throws IOException {
fieldsConsumer.close();
}
};
}
}

View File

@ -47,9 +47,7 @@ public class ElasticsearchPostingsFormatTest extends BasePostingsFormatTestCase
@Override @Override
protected Codec getCodec() { protected Codec getCodec() {
return random().nextBoolean() ? return TestUtil.alwaysPostingsFormat(new Elasticsearch090RWPostingsFormat());
TestUtil.alwaysPostingsFormat(new Elasticsearch090PostingsFormat())
: TestUtil.alwaysPostingsFormat(new BloomFilterPostingsFormat(PostingsFormat.forName("Lucene50"), BloomFilter.Factory.DEFAULT));
} }
} }

View File

@ -19,19 +19,12 @@
package org.elasticsearch.index.engine.internal; package org.elasticsearch.index.engine.internal;
import com.google.common.base.Predicate;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.elasticsearch.action.admin.indices.segments.IndexSegments; import org.elasticsearch.action.admin.indices.segments.IndexSegments;
import org.elasticsearch.action.admin.indices.segments.IndexShardSegments; import org.elasticsearch.action.admin.indices.segments.IndexShardSegments;
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse; import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse;
import org.elasticsearch.action.admin.indices.segments.ShardSegments; import org.elasticsearch.action.admin.indices.segments.ShardSegments;
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.util.BloomFilter;
import org.elasticsearch.index.codec.CodecService;
import org.elasticsearch.index.engine.Segment; import org.elasticsearch.index.engine.Segment;
import org.elasticsearch.index.merge.policy.AbstractMergePolicyProvider;
import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.hamcrest.Matchers; import org.hamcrest.Matchers;
import org.junit.Test; import org.junit.Test;
@ -39,85 +32,9 @@ import org.junit.Test;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
public class InternalEngineIntegrationTest extends ElasticsearchIntegrationTest { public class InternalEngineIntegrationTest extends ElasticsearchIntegrationTest {
@Test
@Slow
public void testSettingLoadBloomFilterDefaultTrue() throws Exception {
client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get();
client().prepareIndex("test", "foo").setSource("field", "foo").get();
ensureGreen();
refresh();
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
final long segmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
logger.info("segments with bloom: {}", segmentsMemoryWithBloom);
logger.info("updating the setting to unload bloom filters");
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
logger.info("waiting for memory to match without blooms");
awaitBusy(new Predicate<Object>() {
public boolean apply(Object o) {
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
long segmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
logger.info("trying segments without bloom: {}", segmentsMemoryWithoutBloom);
return segmentsMemoryWithoutBloom == (segmentsMemoryWithBloom - BloomFilter.Factory.DEFAULT.createFilter(1).getSizeInBytes());
}
});
logger.info("updating the setting to load bloom filters");
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, true)).get();
logger.info("waiting for memory to match with blooms");
awaitBusy(new Predicate<Object>() {
public boolean apply(Object o) {
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
long newSegmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
logger.info("trying segments with bloom: {}", newSegmentsMemoryWithBloom);
return newSegmentsMemoryWithBloom == segmentsMemoryWithBloom;
}
});
}
@Test
@Slow
public void testSettingLoadBloomFilterDefaultFalse() throws Exception {
client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1).put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
client().prepareIndex("test", "foo").setSource("field", "foo").get();
ensureGreen();
refresh();
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
final long segmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
logger.info("segments without bloom: {}", segmentsMemoryWithoutBloom);
logger.info("updating the setting to load bloom filters");
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, true)).get();
logger.info("waiting for memory to match with blooms");
awaitBusy(new Predicate<Object>() {
public boolean apply(Object o) {
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
long segmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
logger.info("trying segments with bloom: {}", segmentsMemoryWithoutBloom);
return segmentsMemoryWithoutBloom == (segmentsMemoryWithBloom - BloomFilter.Factory.DEFAULT.createFilter(1).getSizeInBytes());
}
});
logger.info("updating the setting to unload bloom filters");
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
logger.info("waiting for memory to match without blooms");
awaitBusy(new Predicate<Object>() {
public boolean apply(Object o) {
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
long newSegmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
logger.info("trying segments without bloom: {}", newSegmentsMemoryWithoutBloom);
return newSegmentsMemoryWithoutBloom == segmentsMemoryWithoutBloom;
}
});
}
@Test @Test
public void testSetIndexCompoundOnFlush() { public void testSetIndexCompoundOnFlush() {
client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get(); client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get();

View File

@ -36,6 +36,7 @@ import org.apache.lucene.store.*;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LineFileDocs;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat; import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
@ -70,7 +71,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
LookupFactory load = currentProvider.load(input); LookupFactory load = currentProvider.load(input);
PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat()); PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT));
NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer()); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer());
Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING), new CompletionSuggestionContext(null)); Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING), new CompletionSuggestionContext(null));
List<LookupResult> result = lookup.lookup("ge", false, 10); List<LookupResult> result = lookup.lookup("ge", false, 10);
@ -214,7 +215,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
iter = primaryIter; iter = primaryIter;
} }
reference.build(iter); reference.build(iter);
PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat()); PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT));
NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer()); NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer());
final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads, final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads,

View File

@ -466,9 +466,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
builder.put(FsTranslog.INDEX_TRANSLOG_FS_TYPE, RandomPicks.randomFrom(random, FsTranslogFile.Type.values()).name()); builder.put(FsTranslog.INDEX_TRANSLOG_FS_TYPE, RandomPicks.randomFrom(random, FsTranslogFile.Type.values()).name());
} }
// Randomly load or don't load bloom filters:
builder.put(CodecService.INDEX_CODEC_BLOOM_LOAD, random.nextBoolean());
if (random.nextBoolean()) { if (random.nextBoolean()) {
builder.put(IndicesQueryCache.INDEX_CACHE_QUERY_ENABLED, random.nextBoolean()); builder.put(IndicesQueryCache.INDEX_CACHE_QUERY_ENABLED, random.nextBoolean());
} }