diff --git a/docs/reference/indices/update-settings.asciidoc b/docs/reference/indices/update-settings.asciidoc index c5e16d77653..18f94d91cec 100644 --- a/docs/reference/indices/update-settings.asciidoc +++ b/docs/reference/indices/update-settings.asciidoc @@ -57,10 +57,6 @@ settings API: `index.index_concurrency`:: Defaults to `8`. -`index.codec.bloom.load`:: - Whether to load the bloom filter. Defaults to `false`. - See <>. - `index.fail_on_merge_failure`:: Default to `true`. @@ -227,35 +223,3 @@ curl -XPUT 'localhost:9200/myindex/_settings' -d '{ curl -XPOST 'localhost:9200/myindex/_open' -------------------------------------------------- - -[float] -[[codec-bloom-load]] -=== Bloom filters - -Up to version 1.3, Elasticsearch used to generate bloom filters for the `_uid` -field at indexing time and to load them at search time in order to speed-up -primary-key lookups by savings disk seeks. - -As of 1.4, bloom filters are still generated at indexing time, but they are -no longer loaded at search time by default: they consume RAM in proportion to -the number of unique terms, which can quickly add up for certain use cases, -and separate performance improvements have made the performance gains with -bloom filters very small. - -[TIP] -================================================== - -You can enable loading of the bloom filter at search time on a -per-index basis by updating the index settings: - -[source,js] --------------------------------------------------- -PUT /old_index/_settings?index.codec.bloom.load=true --------------------------------------------------- - -This setting, which defaults to `false`, can be updated on a live index. Note, -however, that changing the value will cause the index to be reopened, which -will invalidate any existing caches. - -================================================== - diff --git a/src/main/java/org/elasticsearch/index/codec/CodecService.java b/src/main/java/org/elasticsearch/index/codec/CodecService.java index 45c9055044e..907c1d0e2fd 100644 --- a/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -44,16 +44,11 @@ import org.elasticsearch.index.settings.IndexSettings; */ public class CodecService extends AbstractIndexComponent { - public static final String INDEX_CODEC_BLOOM_LOAD = "index.codec.bloom.load"; - public static final boolean INDEX_CODEC_BLOOM_LOAD_DEFAULT = false; - private final PostingsFormatService postingsFormatService; private final DocValuesFormatService docValuesFormatService; private final MapperService mapperService; private final ImmutableMap codecs; - private volatile boolean loadBloomFilter = true; - public final static String DEFAULT_CODEC = "default"; public CodecService(Index index) { @@ -83,7 +78,6 @@ public class CodecService extends AbstractIndexComponent { codecs.put(codec, Codec.forName(codec)); } this.codecs = codecs.immutableMap(); - this.loadBloomFilter = indexSettings.getAsBoolean(INDEX_CODEC_BLOOM_LOAD, INDEX_CODEC_BLOOM_LOAD_DEFAULT); } public PostingsFormatService postingsFormatService() { @@ -105,12 +99,4 @@ public class CodecService extends AbstractIndexComponent { } return codec; } - - public boolean isLoadBloomFilter() { - return this.loadBloomFilter; - } - - public void setLoadBloomFilter(boolean loadBloomFilter) { - this.loadBloomFilter = loadBloomFilter; - } } diff --git a/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java b/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java index 3e91b9c1418..c87565886b5 100644 --- a/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java +++ b/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormat.java @@ -24,8 +24,6 @@ import org.apache.lucene.index.*; import org.apache.lucene.store.*; import org.apache.lucene.util.*; import org.elasticsearch.common.util.BloomFilter; -import org.elasticsearch.index.store.DirectoryUtils; -import org.elasticsearch.index.store.Store; import java.io.IOException; import java.util.*; @@ -42,7 +40,9 @@ import java.util.Map.Entry; * This is a special bloom filter version, based on {@link org.elasticsearch.common.util.BloomFilter} and inspired * by Lucene {@link org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat}. *

+ * @deprecated only for reading old segments */ +@Deprecated public final class BloomFilterPostingsFormat extends PostingsFormat { public static final String BLOOM_CODEC_NAME = "XBloomFilter"; // the Lucene one is named BloomFilter @@ -160,30 +160,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat { // // Load the hash function used in the BloomFilter // hashFunction = HashFunction.forName(bloomIn.readString()); // Load the delegate postings format - final String delegatePostings = bloomIn - .readString(); - int numBlooms = bloomIn.readInt(); - - boolean load = false; - Store.StoreDirectory storeDir = DirectoryUtils.getStoreDirectory(state.directory); - if (storeDir != null && storeDir.codecService() != null) { - load = storeDir.codecService().isLoadBloomFilter(); - } - - if (load) { - for (int i = 0; i < numBlooms; i++) { - int fieldNum = bloomIn.readInt(); - FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum); - LazyBloomLoader loader = new LazyBloomLoader(bloomIn.getFilePointer(), dataInput); - bloomsByFieldName.put(fieldInfo.name, loader); - BloomFilter.skipBloom(bloomIn); - } - if (version >= BLOOM_CODEC_VERSION_CHECKSUM) { - CodecUtil.checkFooter(bloomIn); - } else { - CodecUtil.checkEOF(bloomIn); - } - } + final String delegatePostings = bloomIn.readString(); this.delegateFieldsProducer = PostingsFormat.forName(delegatePostings) .fieldsProducer(state); this.data = dataInput; @@ -383,8 +360,9 @@ public final class BloomFilterPostingsFormat extends PostingsFormat { } - - final class BloomFilteredFieldsConsumer extends FieldsConsumer { + // TODO: would be great to move this out to test code, but the interaction between es090 and bloom is complex + // at least it is not accessible via SPI + public final class BloomFilteredFieldsConsumer extends FieldsConsumer { private FieldsConsumer delegateFieldsConsumer; private Map bloomFilters = new HashMap<>(); private SegmentWriteState state; @@ -399,7 +377,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat { } // for internal use only - FieldsConsumer getDelegate() { + public FieldsConsumer getDelegate() { return delegateFieldsConsumer; } diff --git a/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormatProvider.java b/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormatProvider.java index 1532b8f6a6d..df4c41396c1 100644 --- a/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormatProvider.java +++ b/src/main/java/org/elasticsearch/index/codec/postingsformat/BloomFilterPostingsFormatProvider.java @@ -30,7 +30,9 @@ import org.elasticsearch.index.settings.IndexSettings; import java.util.Map; /** + * @deprecated only for reading old segments */ +@Deprecated public class BloomFilterPostingsFormatProvider extends AbstractPostingsFormatProvider { private final PostingsFormatProvider delegate; diff --git a/src/main/java/org/elasticsearch/index/codec/postingsformat/Elasticsearch090PostingsFormat.java b/src/main/java/org/elasticsearch/index/codec/postingsformat/Elasticsearch090PostingsFormat.java index b533cebd7f8..b4b4d907ce0 100644 --- a/src/main/java/org/elasticsearch/index/codec/postingsformat/Elasticsearch090PostingsFormat.java +++ b/src/main/java/org/elasticsearch/index/codec/postingsformat/Elasticsearch090PostingsFormat.java @@ -38,14 +38,17 @@ import java.io.IOException; import java.util.Iterator; /** - * This is the default postings format for Elasticsearch that special cases + * This is the old default postings format for Elasticsearch that special cases * the _uid field to use a bloom filter while all other fields * will use a {@link Lucene50PostingsFormat}. This format will reuse the underlying * {@link Lucene50PostingsFormat} and its files also for the _uid saving up to * 5 files per segment in the default case. + *

+ * @deprecated only for reading old segments */ -public final class Elasticsearch090PostingsFormat extends PostingsFormat { - private final BloomFilterPostingsFormat bloomPostings; +@Deprecated +public class Elasticsearch090PostingsFormat extends PostingsFormat { + protected final BloomFilterPostingsFormat bloomPostings; public Elasticsearch090PostingsFormat() { super("es090"); @@ -57,7 +60,7 @@ public final class Elasticsearch090PostingsFormat extends PostingsFormat { public PostingsFormat getDefaultWrapped() { return bloomPostings.getDelegate(); } - private static final Predicate UID_FIELD_FILTER = new Predicate() { + protected static final Predicate UID_FIELD_FILTER = new Predicate() { @Override public boolean apply(String s) { @@ -67,34 +70,7 @@ public final class Elasticsearch090PostingsFormat extends PostingsFormat { @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state); - return new FieldsConsumer() { - - @Override - public void write(Fields fields) throws IOException { - - Fields maskedFields = new FilterLeafReader.FilterFields(fields) { - @Override - public Iterator iterator() { - return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER)); - } - }; - fieldsConsumer.getDelegate().write(maskedFields); - maskedFields = new FilterLeafReader.FilterFields(fields) { - @Override - public Iterator iterator() { - return Iterators.singletonIterator(UidFieldMapper.NAME); - } - }; - // only go through bloom for the UID field - fieldsConsumer.write(maskedFields); - } - - @Override - public void close() throws IOException { - fieldsConsumer.close(); - } - }; + throw new UnsupportedOperationException("this codec can only be used for reading"); } @Override diff --git a/src/main/java/org/elasticsearch/index/codec/postingsformat/PostingFormats.java b/src/main/java/org/elasticsearch/index/codec/postingsformat/PostingFormats.java index c60ebee2493..b65cee5ae40 100644 --- a/src/main/java/org/elasticsearch/index/codec/postingsformat/PostingFormats.java +++ b/src/main/java/org/elasticsearch/index/codec/postingsformat/PostingFormats.java @@ -30,10 +30,7 @@ import org.elasticsearch.common.util.BloomFilter; * This class represents the set of Elasticsearch "built-in" * {@link PostingsFormatProvider.Factory postings format factories} *

    - *
  • bloom_default: a postings format that uses a bloom filter to - * improve term lookup performance. This is useful for primarily keys or fields - * that are used as a delete key
  • - *
  • default: the default Elasticsearch postings format offering best + *
  • default: the default Lucene postings format offering best * general purpose performance. This format is used if no postings format is * specified in the field mapping.
  • *
  • ***: other formats from Lucene core (e.g. Lucene41 as of Lucene 4.10) @@ -51,12 +48,10 @@ public class PostingFormats { for (String luceneName : PostingsFormat.availablePostingsFormats()) { builtInPostingFormatsX.put(luceneName, new PreBuiltPostingsFormatProvider.Factory(PostingsFormat.forName(luceneName))); } - final PostingsFormat defaultFormat = new Elasticsearch090PostingsFormat(); + final PostingsFormat defaultFormat = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT); builtInPostingFormatsX.put(PostingsFormatService.DEFAULT_FORMAT, new PreBuiltPostingsFormatProvider.Factory(PostingsFormatService.DEFAULT_FORMAT, defaultFormat)); - builtInPostingFormatsX.put("bloom_default", new PreBuiltPostingsFormatProvider.Factory("bloom_default", wrapInBloom(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT)))); - builtInPostingFormats = builtInPostingFormatsX.immutableMap(); } diff --git a/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java b/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java index 373de60ba01..0b40d746806 100644 --- a/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/internal/InternalEngine.java @@ -1522,12 +1522,10 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin int indexConcurrency = settings.getAsInt(INDEX_INDEX_CONCURRENCY, InternalEngine.this.indexConcurrency); boolean failOnMergeFailure = settings.getAsBoolean(INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure); String codecName = settings.get(INDEX_CODEC, InternalEngine.this.codecName); - final boolean codecBloomLoad = settings.getAsBoolean(CodecService.INDEX_CODEC_BLOOM_LOAD, codecService.isLoadBloomFilter()); boolean requiresFlushing = false; if (indexConcurrency != InternalEngine.this.indexConcurrency || !codecName.equals(InternalEngine.this.codecName) || - failOnMergeFailure != InternalEngine.this.failOnMergeFailure || - codecBloomLoad != codecService.isLoadBloomFilter()) { + failOnMergeFailure != InternalEngine.this.failOnMergeFailure) { try (InternalLock _ = readLock.acquire()) { if (indexConcurrency != InternalEngine.this.indexConcurrency) { logger.info("updating index.index_concurrency from [{}] to [{}]", InternalEngine.this.indexConcurrency, indexConcurrency); @@ -1545,12 +1543,6 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin logger.info("updating {} from [{}] to [{}]", InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure, failOnMergeFailure); InternalEngine.this.failOnMergeFailure = failOnMergeFailure; } - if (codecBloomLoad != codecService.isLoadBloomFilter()) { - logger.info("updating {} from [{}] to [{}]", CodecService.INDEX_CODEC_BLOOM_LOAD, codecService.isLoadBloomFilter(), codecBloomLoad); - codecService.setLoadBloomFilter(codecBloomLoad); - // we need to flush in this case, to load/unload the bloom filters - requiresFlushing = true; - } } if (requiresFlushing) { flush(new Flush().type(Flush.Type.NEW_WRITER)); diff --git a/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java b/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java index 98dffa5fe99..de7c666fee8 100644 --- a/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java +++ b/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java @@ -85,7 +85,6 @@ public class IndexDynamicSettingsModule extends AbstractModule { indexDynamicSettings.addDynamicSetting(LogDocMergePolicyProvider.INDEX_COMPOUND_FORMAT); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_INDEX_CONCURRENCY, Validator.NON_NEGATIVE_INTEGER); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_COMPOUND_ON_FLUSH, Validator.BOOLEAN); - indexDynamicSettings.addDynamicSetting(CodecService.INDEX_CODEC_BLOOM_LOAD, Validator.BOOLEAN); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_GC_DELETES, Validator.TIME); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_CODEC); indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE); diff --git a/src/main/java/org/elasticsearch/index/store/Store.java b/src/main/java/org/elasticsearch/index/store/Store.java index 30af8cd650e..51c51907e93 100644 --- a/src/main/java/org/elasticsearch/index/store/Store.java +++ b/src/main/java/org/elasticsearch/index/store/Store.java @@ -550,12 +550,6 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex return Store.this.shardId(); } - @Nullable - public CodecService codecService() { - ensureOpen(); - return Store.this.codecService; - } - @Override public void close() throws IOException { assert false : "Nobody should close this directory except of the Store itself"; diff --git a/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 38e980a1b67..20debad9cac 100644 --- a/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -1,3 +1,2 @@ -org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat org.elasticsearch.search.suggest.completion.Completion090PostingsFormat diff --git a/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/src/test/java/org/elasticsearch/index/codec/CodecTests.java index f05832e90b9..b15c73d2843 100644 --- a/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.codec; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat; import org.apache.lucene.codecs.lucene40.Lucene40Codec; import org.apache.lucene.codecs.lucene41.Lucene41Codec; @@ -33,6 +34,7 @@ import org.apache.lucene.codecs.lucene49.Lucene49Codec; import org.apache.lucene.codecs.lucene50.Lucene50Codec; import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; @@ -80,25 +82,16 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase { public void testResolveDefaultPostingFormats() throws Exception { PostingsFormatService postingsFormatService = createCodecService().postingsFormatService(); assertThat(postingsFormatService.get("default"), instanceOf(PreBuiltPostingsFormatProvider.class)); - assertThat(postingsFormatService.get("default").get(), instanceOf(Elasticsearch090PostingsFormat.class)); + PostingsFormat luceneDefault = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT); + assertThat(postingsFormatService.get("default").get(), instanceOf(luceneDefault.getClass())); // Should fail when upgrading Lucene with codec changes - assertThat(((Elasticsearch090PostingsFormat)postingsFormatService.get("default").get()).getDefaultWrapped(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField("").getClass())); assertThat(postingsFormatService.get("Lucene41"), instanceOf(PreBuiltPostingsFormatProvider.class)); // Should fail when upgrading Lucene with codec changes assertThat(postingsFormatService.get("Lucene50").get(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField(null).getClass())); - assertThat(postingsFormatService.get("bloom_default"), instanceOf(PreBuiltPostingsFormatProvider.class)); - if (PostingFormats.luceneBloomFilter) { - assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilteringPostingsFormat.class)); - } else { - assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilterPostingsFormat.class)); - } assertThat(postingsFormatService.get("BloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class)); assertThat(postingsFormatService.get("BloomFilter").get(), instanceOf(BloomFilteringPostingsFormat.class)); - - assertThat(postingsFormatService.get("XBloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class)); - assertThat(postingsFormatService.get("XBloomFilter").get(), instanceOf(BloomFilterPostingsFormat.class)); } @Test @@ -128,7 +121,8 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase { CodecService codecService = createCodecService(indexSettings); DocumentMapper documentMapper = codecService.mapperService().documentMapperParser().parse(mapping); assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider(), instanceOf(PreBuiltPostingsFormatProvider.class)); - assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(Elasticsearch090PostingsFormat.class)); + PostingsFormat luceneDefault = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT); + assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(luceneDefault.getClass())); assertThat(documentMapper.mappers().name("field2").mapper().postingsFormatProvider(), instanceOf(DefaultPostingsFormatProvider.class)); DefaultPostingsFormatProvider provider = (DefaultPostingsFormatProvider) documentMapper.mappers().name("field2").mapper().postingsFormatProvider(); diff --git a/src/test/java/org/elasticsearch/index/codec/postingformat/DefaultPostingsFormatTests.java b/src/test/java/org/elasticsearch/index/codec/postingformat/DefaultPostingsFormatTests.java deleted file mode 100644 index 019f10b4d6d..00000000000 --- a/src/test/java/org/elasticsearch/index/codec/postingformat/DefaultPostingsFormatTests.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.codec.postingformat; - -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene50.Lucene50Codec; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.*; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; -import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat; -import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat; -import org.elasticsearch.index.mapper.internal.UidFieldMapper; -import org.elasticsearch.test.ElasticsearchTestCase; -import org.junit.Test; - -import java.io.IOException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import static org.hamcrest.Matchers.*; - -/** - * Simple smoke test for {@link org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat} - */ -public class DefaultPostingsFormatTests extends ElasticsearchTestCase { - - private final class TestCodec extends Lucene50Codec { - - @Override - public PostingsFormat getPostingsFormatForField(String field) { - return new Elasticsearch090PostingsFormat(); - } - } - - @Test - public void testUseDefault() throws IOException { - - Codec codec = new TestCodec(); - Directory d = new RAMDirectory(); - IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); - config.setCodec(codec); - IndexWriter writer = new IndexWriter(d, config); - writer.addDocument(Arrays.asList(new TextField("foo", "bar", Store.YES), new TextField(UidFieldMapper.NAME, "1234", Store.YES))); - writer.commit(); - DirectoryReader reader = DirectoryReader.open(writer, false); - List leaves = reader.leaves(); - assertThat(leaves.size(), equalTo(1)); - LeafReader ar = leaves.get(0).reader(); - Terms terms = ar.terms("foo"); - Terms uidTerms = ar.terms(UidFieldMapper.NAME); - - assertThat(terms.size(), equalTo(1l)); - assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); - assertThat(uidTerms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); - - reader.close(); - writer.close(); - d.close(); - } - - @Test - public void testNoUIDField() throws IOException { - - Codec codec = new TestCodec(); - Directory d = new RAMDirectory(); - IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); - config.setCodec(codec); - IndexWriter writer = new IndexWriter(d, config); - for (int i = 0; i < 100; i++) { - writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES), new TextField("some_other_field", "1234", Store.YES))); - } - writer.forceMerge(1, true); - writer.commit(); - - DirectoryReader reader = DirectoryReader.open(writer, false); - List leaves = reader.leaves(); - assertThat(leaves.size(), equalTo(1)); - LeafReader ar = leaves.get(0).reader(); - Terms terms = ar.terms("foo"); - Terms some_other_field = ar.terms("some_other_field"); - - assertThat(terms.size(), equalTo(2l)); - assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); - assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class))); - TermsEnum iterator = terms.iterator(null); - Set expected = new HashSet<>(); - expected.add("foo"); - expected.add("bar"); - while(iterator.next() != null) { - expected.remove(iterator.term().utf8ToString()); - } - assertThat(expected.size(), equalTo(0)); - reader.close(); - writer.close(); - d.close(); - } - -} diff --git a/src/test/java/org/elasticsearch/index/codec/postingformat/Elasticsearch090RWPostingsFormat.java b/src/test/java/org/elasticsearch/index/codec/postingformat/Elasticsearch090RWPostingsFormat.java new file mode 100644 index 00000000000..39ba13f363c --- /dev/null +++ b/src/test/java/org/elasticsearch/index/codec/postingformat/Elasticsearch090RWPostingsFormat.java @@ -0,0 +1,69 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.codec.postingformat; + +import com.google.common.base.Predicates; +import com.google.common.collect.Iterators; + +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FilterLeafReader; +import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat.BloomFilteredFieldsConsumer; +import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; + +import java.io.IOException; +import java.util.Iterator; + +/** read-write version with blooms for testing */ +public class Elasticsearch090RWPostingsFormat extends Elasticsearch090PostingsFormat { + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state); + return new FieldsConsumer() { + + @Override + public void write(Fields fields) throws IOException { + + Fields maskedFields = new FilterLeafReader.FilterFields(fields) { + @Override + public Iterator iterator() { + return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER)); + } + }; + fieldsConsumer.getDelegate().write(maskedFields); + maskedFields = new FilterLeafReader.FilterFields(fields) { + @Override + public Iterator iterator() { + return Iterators.singletonIterator(UidFieldMapper.NAME); + } + }; + // only go through bloom for the UID field + fieldsConsumer.write(maskedFields); + } + + @Override + public void close() throws IOException { + fieldsConsumer.close(); + } + }; + } +} diff --git a/src/test/java/org/elasticsearch/index/codec/postingformat/ElasticsearchPostingsFormatTest.java b/src/test/java/org/elasticsearch/index/codec/postingformat/ElasticsearchPostingsFormatTest.java index b564d0d8a08..d015aead771 100644 --- a/src/test/java/org/elasticsearch/index/codec/postingformat/ElasticsearchPostingsFormatTest.java +++ b/src/test/java/org/elasticsearch/index/codec/postingformat/ElasticsearchPostingsFormatTest.java @@ -47,9 +47,7 @@ public class ElasticsearchPostingsFormatTest extends BasePostingsFormatTestCase @Override protected Codec getCodec() { - return random().nextBoolean() ? - TestUtil.alwaysPostingsFormat(new Elasticsearch090PostingsFormat()) - : TestUtil.alwaysPostingsFormat(new BloomFilterPostingsFormat(PostingsFormat.forName("Lucene50"), BloomFilter.Factory.DEFAULT)); + return TestUtil.alwaysPostingsFormat(new Elasticsearch090RWPostingsFormat()); } } diff --git a/src/test/java/org/elasticsearch/index/engine/internal/InternalEngineIntegrationTest.java b/src/test/java/org/elasticsearch/index/engine/internal/InternalEngineIntegrationTest.java index 958a1d12e57..e325a46494f 100644 --- a/src/test/java/org/elasticsearch/index/engine/internal/InternalEngineIntegrationTest.java +++ b/src/test/java/org/elasticsearch/index/engine/internal/InternalEngineIntegrationTest.java @@ -19,19 +19,12 @@ package org.elasticsearch.index.engine.internal; -import com.google.common.base.Predicate; -import org.apache.lucene.util.LuceneTestCase.Slow; import org.elasticsearch.action.admin.indices.segments.IndexSegments; import org.elasticsearch.action.admin.indices.segments.IndexShardSegments; import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse; import org.elasticsearch.action.admin.indices.segments.ShardSegments; -import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; -import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.common.util.BloomFilter; -import org.elasticsearch.index.codec.CodecService; import org.elasticsearch.index.engine.Segment; -import org.elasticsearch.index.merge.policy.AbstractMergePolicyProvider; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.hamcrest.Matchers; import org.junit.Test; @@ -39,85 +32,9 @@ import org.junit.Test; import java.util.Collection; import java.util.HashSet; import java.util.Set; -import java.util.concurrent.ExecutionException; - -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; public class InternalEngineIntegrationTest extends ElasticsearchIntegrationTest { - @Test - @Slow - public void testSettingLoadBloomFilterDefaultTrue() throws Exception { - client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get(); - client().prepareIndex("test", "foo").setSource("field", "foo").get(); - ensureGreen(); - refresh(); - IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get(); - final long segmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes(); - logger.info("segments with bloom: {}", segmentsMemoryWithBloom); - - logger.info("updating the setting to unload bloom filters"); - client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get(); - logger.info("waiting for memory to match without blooms"); - awaitBusy(new Predicate() { - public boolean apply(Object o) { - IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get(); - long segmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes(); - logger.info("trying segments without bloom: {}", segmentsMemoryWithoutBloom); - return segmentsMemoryWithoutBloom == (segmentsMemoryWithBloom - BloomFilter.Factory.DEFAULT.createFilter(1).getSizeInBytes()); - } - }); - - logger.info("updating the setting to load bloom filters"); - client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, true)).get(); - logger.info("waiting for memory to match with blooms"); - awaitBusy(new Predicate() { - public boolean apply(Object o) { - IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get(); - long newSegmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes(); - logger.info("trying segments with bloom: {}", newSegmentsMemoryWithBloom); - return newSegmentsMemoryWithBloom == segmentsMemoryWithBloom; - } - }); - } - - @Test - @Slow - public void testSettingLoadBloomFilterDefaultFalse() throws Exception { - client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1).put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get(); - client().prepareIndex("test", "foo").setSource("field", "foo").get(); - ensureGreen(); - refresh(); - - IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get(); - final long segmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes(); - logger.info("segments without bloom: {}", segmentsMemoryWithoutBloom); - - logger.info("updating the setting to load bloom filters"); - client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, true)).get(); - logger.info("waiting for memory to match with blooms"); - awaitBusy(new Predicate() { - public boolean apply(Object o) { - IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get(); - long segmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes(); - logger.info("trying segments with bloom: {}", segmentsMemoryWithoutBloom); - return segmentsMemoryWithoutBloom == (segmentsMemoryWithBloom - BloomFilter.Factory.DEFAULT.createFilter(1).getSizeInBytes()); - } - }); - - logger.info("updating the setting to unload bloom filters"); - client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get(); - logger.info("waiting for memory to match without blooms"); - awaitBusy(new Predicate() { - public boolean apply(Object o) { - IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get(); - long newSegmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes(); - logger.info("trying segments without bloom: {}", newSegmentsMemoryWithoutBloom); - return newSegmentsMemoryWithoutBloom == segmentsMemoryWithoutBloom; - } - }); - } - @Test public void testSetIndexCompoundOnFlush() { client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get(); diff --git a/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java b/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java index e7d11ffe0f8..d6e7a5e9dfc 100644 --- a/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java +++ b/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java @@ -36,6 +36,7 @@ import org.apache.lucene.store.*; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LineFileDocs; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat; import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; @@ -70,7 +71,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT); LookupFactory load = currentProvider.load(input); - PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat()); + PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT)); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer()); Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING), new CompletionSuggestionContext(null)); List result = lookup.lookup("ge", false, 10); @@ -214,7 +215,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { iter = primaryIter; } reference.build(iter); - PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat()); + PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT)); NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer()); final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads, diff --git a/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java b/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java index 9af568c751b..e283ecffa7f 100644 --- a/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java +++ b/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java @@ -466,9 +466,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase builder.put(FsTranslog.INDEX_TRANSLOG_FS_TYPE, RandomPicks.randomFrom(random, FsTranslogFile.Type.values()).name()); } - // Randomly load or don't load bloom filters: - builder.put(CodecService.INDEX_CODEC_BLOOM_LOAD, random.nextBoolean()); - if (random.nextBoolean()) { builder.put(IndicesQueryCache.INDEX_CACHE_QUERY_ENABLED, random.nextBoolean()); }