Disable bloom filters.
make the "es090" postings format read-only, just to support old segments. There is a test version that subclasses it with write-capability for testing. Closes #8571
This commit is contained in:
parent
3e1b7c7a34
commit
9ef69f9f36
|
@ -57,10 +57,6 @@ settings API:
|
|||
`index.index_concurrency`::
|
||||
Defaults to `8`.
|
||||
|
||||
`index.codec.bloom.load`::
|
||||
Whether to load the bloom filter. Defaults to `false`.
|
||||
See <<codec-bloom-load>>.
|
||||
|
||||
`index.fail_on_merge_failure`::
|
||||
Default to `true`.
|
||||
|
||||
|
@ -227,35 +223,3 @@ curl -XPUT 'localhost:9200/myindex/_settings' -d '{
|
|||
|
||||
curl -XPOST 'localhost:9200/myindex/_open'
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
[[codec-bloom-load]]
|
||||
=== Bloom filters
|
||||
|
||||
Up to version 1.3, Elasticsearch used to generate bloom filters for the `_uid`
|
||||
field at indexing time and to load them at search time in order to speed-up
|
||||
primary-key lookups by savings disk seeks.
|
||||
|
||||
As of 1.4, bloom filters are still generated at indexing time, but they are
|
||||
no longer loaded at search time by default: they consume RAM in proportion to
|
||||
the number of unique terms, which can quickly add up for certain use cases,
|
||||
and separate performance improvements have made the performance gains with
|
||||
bloom filters very small.
|
||||
|
||||
[TIP]
|
||||
==================================================
|
||||
|
||||
You can enable loading of the bloom filter at search time on a
|
||||
per-index basis by updating the index settings:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /old_index/_settings?index.codec.bloom.load=true
|
||||
--------------------------------------------------
|
||||
|
||||
This setting, which defaults to `false`, can be updated on a live index. Note,
|
||||
however, that changing the value will cause the index to be reopened, which
|
||||
will invalidate any existing caches.
|
||||
|
||||
==================================================
|
||||
|
||||
|
|
|
@ -44,16 +44,11 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
*/
|
||||
public class CodecService extends AbstractIndexComponent {
|
||||
|
||||
public static final String INDEX_CODEC_BLOOM_LOAD = "index.codec.bloom.load";
|
||||
public static final boolean INDEX_CODEC_BLOOM_LOAD_DEFAULT = false;
|
||||
|
||||
private final PostingsFormatService postingsFormatService;
|
||||
private final DocValuesFormatService docValuesFormatService;
|
||||
private final MapperService mapperService;
|
||||
private final ImmutableMap<String, Codec> codecs;
|
||||
|
||||
private volatile boolean loadBloomFilter = true;
|
||||
|
||||
public final static String DEFAULT_CODEC = "default";
|
||||
|
||||
public CodecService(Index index) {
|
||||
|
@ -83,7 +78,6 @@ public class CodecService extends AbstractIndexComponent {
|
|||
codecs.put(codec, Codec.forName(codec));
|
||||
}
|
||||
this.codecs = codecs.immutableMap();
|
||||
this.loadBloomFilter = indexSettings.getAsBoolean(INDEX_CODEC_BLOOM_LOAD, INDEX_CODEC_BLOOM_LOAD_DEFAULT);
|
||||
}
|
||||
|
||||
public PostingsFormatService postingsFormatService() {
|
||||
|
@ -105,12 +99,4 @@ public class CodecService extends AbstractIndexComponent {
|
|||
}
|
||||
return codec;
|
||||
}
|
||||
|
||||
public boolean isLoadBloomFilter() {
|
||||
return this.loadBloomFilter;
|
||||
}
|
||||
|
||||
public void setLoadBloomFilter(boolean loadBloomFilter) {
|
||||
this.loadBloomFilter = loadBloomFilter;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,8 +24,6 @@ import org.apache.lucene.index.*;
|
|||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.elasticsearch.common.util.BloomFilter;
|
||||
import org.elasticsearch.index.store.DirectoryUtils;
|
||||
import org.elasticsearch.index.store.Store;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
@ -42,7 +40,9 @@ import java.util.Map.Entry;
|
|||
* This is a special bloom filter version, based on {@link org.elasticsearch.common.util.BloomFilter} and inspired
|
||||
* by Lucene {@link org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat}.
|
||||
* </p>
|
||||
* @deprecated only for reading old segments
|
||||
*/
|
||||
@Deprecated
|
||||
public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||
|
||||
public static final String BLOOM_CODEC_NAME = "XBloomFilter"; // the Lucene one is named BloomFilter
|
||||
|
@ -160,30 +160,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
|||
// // Load the hash function used in the BloomFilter
|
||||
// hashFunction = HashFunction.forName(bloomIn.readString());
|
||||
// Load the delegate postings format
|
||||
final String delegatePostings = bloomIn
|
||||
.readString();
|
||||
int numBlooms = bloomIn.readInt();
|
||||
|
||||
boolean load = false;
|
||||
Store.StoreDirectory storeDir = DirectoryUtils.getStoreDirectory(state.directory);
|
||||
if (storeDir != null && storeDir.codecService() != null) {
|
||||
load = storeDir.codecService().isLoadBloomFilter();
|
||||
}
|
||||
|
||||
if (load) {
|
||||
for (int i = 0; i < numBlooms; i++) {
|
||||
int fieldNum = bloomIn.readInt();
|
||||
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
|
||||
LazyBloomLoader loader = new LazyBloomLoader(bloomIn.getFilePointer(), dataInput);
|
||||
bloomsByFieldName.put(fieldInfo.name, loader);
|
||||
BloomFilter.skipBloom(bloomIn);
|
||||
}
|
||||
if (version >= BLOOM_CODEC_VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(bloomIn);
|
||||
} else {
|
||||
CodecUtil.checkEOF(bloomIn);
|
||||
}
|
||||
}
|
||||
final String delegatePostings = bloomIn.readString();
|
||||
this.delegateFieldsProducer = PostingsFormat.forName(delegatePostings)
|
||||
.fieldsProducer(state);
|
||||
this.data = dataInput;
|
||||
|
@ -383,8 +360,9 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
|||
|
||||
}
|
||||
|
||||
|
||||
final class BloomFilteredFieldsConsumer extends FieldsConsumer {
|
||||
// TODO: would be great to move this out to test code, but the interaction between es090 and bloom is complex
|
||||
// at least it is not accessible via SPI
|
||||
public final class BloomFilteredFieldsConsumer extends FieldsConsumer {
|
||||
private FieldsConsumer delegateFieldsConsumer;
|
||||
private Map<FieldInfo, BloomFilter> bloomFilters = new HashMap<>();
|
||||
private SegmentWriteState state;
|
||||
|
@ -399,7 +377,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
// for internal use only
|
||||
FieldsConsumer getDelegate() {
|
||||
public FieldsConsumer getDelegate() {
|
||||
return delegateFieldsConsumer;
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,9 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @deprecated only for reading old segments
|
||||
*/
|
||||
@Deprecated
|
||||
public class BloomFilterPostingsFormatProvider extends AbstractPostingsFormatProvider {
|
||||
|
||||
private final PostingsFormatProvider delegate;
|
||||
|
|
|
@ -38,14 +38,17 @@ import java.io.IOException;
|
|||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* This is the default postings format for Elasticsearch that special cases
|
||||
* This is the old default postings format for Elasticsearch that special cases
|
||||
* the <tt>_uid</tt> field to use a bloom filter while all other fields
|
||||
* will use a {@link Lucene50PostingsFormat}. This format will reuse the underlying
|
||||
* {@link Lucene50PostingsFormat} and its files also for the <tt>_uid</tt> saving up to
|
||||
* 5 files per segment in the default case.
|
||||
* <p>
|
||||
* @deprecated only for reading old segments
|
||||
*/
|
||||
public final class Elasticsearch090PostingsFormat extends PostingsFormat {
|
||||
private final BloomFilterPostingsFormat bloomPostings;
|
||||
@Deprecated
|
||||
public class Elasticsearch090PostingsFormat extends PostingsFormat {
|
||||
protected final BloomFilterPostingsFormat bloomPostings;
|
||||
|
||||
public Elasticsearch090PostingsFormat() {
|
||||
super("es090");
|
||||
|
@ -57,7 +60,7 @@ public final class Elasticsearch090PostingsFormat extends PostingsFormat {
|
|||
public PostingsFormat getDefaultWrapped() {
|
||||
return bloomPostings.getDelegate();
|
||||
}
|
||||
private static final Predicate<String> UID_FIELD_FILTER = new Predicate<String>() {
|
||||
protected static final Predicate<String> UID_FIELD_FILTER = new Predicate<String>() {
|
||||
|
||||
@Override
|
||||
public boolean apply(String s) {
|
||||
|
@ -67,34 +70,7 @@ public final class Elasticsearch090PostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state);
|
||||
return new FieldsConsumer() {
|
||||
|
||||
@Override
|
||||
public void write(Fields fields) throws IOException {
|
||||
|
||||
Fields maskedFields = new FilterLeafReader.FilterFields(fields) {
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER));
|
||||
}
|
||||
};
|
||||
fieldsConsumer.getDelegate().write(maskedFields);
|
||||
maskedFields = new FilterLeafReader.FilterFields(fields) {
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Iterators.singletonIterator(UidFieldMapper.NAME);
|
||||
}
|
||||
};
|
||||
// only go through bloom for the UID field
|
||||
fieldsConsumer.write(maskedFields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
fieldsConsumer.close();
|
||||
}
|
||||
};
|
||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -30,10 +30,7 @@ import org.elasticsearch.common.util.BloomFilter;
|
|||
* This class represents the set of Elasticsearch "built-in"
|
||||
* {@link PostingsFormatProvider.Factory postings format factories}
|
||||
* <ul>
|
||||
* <li><b>bloom_default</b>: a postings format that uses a bloom filter to
|
||||
* improve term lookup performance. This is useful for primarily keys or fields
|
||||
* that are used as a delete key</li>
|
||||
* <li><b>default</b>: the default Elasticsearch postings format offering best
|
||||
* <li><b>default</b>: the default Lucene postings format offering best
|
||||
* general purpose performance. This format is used if no postings format is
|
||||
* specified in the field mapping.</li>
|
||||
* <li><b>***</b>: other formats from Lucene core (e.g. Lucene41 as of Lucene 4.10)
|
||||
|
@ -51,12 +48,10 @@ public class PostingFormats {
|
|||
for (String luceneName : PostingsFormat.availablePostingsFormats()) {
|
||||
builtInPostingFormatsX.put(luceneName, new PreBuiltPostingsFormatProvider.Factory(PostingsFormat.forName(luceneName)));
|
||||
}
|
||||
final PostingsFormat defaultFormat = new Elasticsearch090PostingsFormat();
|
||||
final PostingsFormat defaultFormat = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
|
||||
builtInPostingFormatsX.put(PostingsFormatService.DEFAULT_FORMAT,
|
||||
new PreBuiltPostingsFormatProvider.Factory(PostingsFormatService.DEFAULT_FORMAT, defaultFormat));
|
||||
|
||||
builtInPostingFormatsX.put("bloom_default", new PreBuiltPostingsFormatProvider.Factory("bloom_default", wrapInBloom(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT))));
|
||||
|
||||
builtInPostingFormats = builtInPostingFormatsX.immutableMap();
|
||||
}
|
||||
|
||||
|
|
|
@ -1522,12 +1522,10 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
|
|||
int indexConcurrency = settings.getAsInt(INDEX_INDEX_CONCURRENCY, InternalEngine.this.indexConcurrency);
|
||||
boolean failOnMergeFailure = settings.getAsBoolean(INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure);
|
||||
String codecName = settings.get(INDEX_CODEC, InternalEngine.this.codecName);
|
||||
final boolean codecBloomLoad = settings.getAsBoolean(CodecService.INDEX_CODEC_BLOOM_LOAD, codecService.isLoadBloomFilter());
|
||||
boolean requiresFlushing = false;
|
||||
if (indexConcurrency != InternalEngine.this.indexConcurrency ||
|
||||
!codecName.equals(InternalEngine.this.codecName) ||
|
||||
failOnMergeFailure != InternalEngine.this.failOnMergeFailure ||
|
||||
codecBloomLoad != codecService.isLoadBloomFilter()) {
|
||||
failOnMergeFailure != InternalEngine.this.failOnMergeFailure) {
|
||||
try (InternalLock _ = readLock.acquire()) {
|
||||
if (indexConcurrency != InternalEngine.this.indexConcurrency) {
|
||||
logger.info("updating index.index_concurrency from [{}] to [{}]", InternalEngine.this.indexConcurrency, indexConcurrency);
|
||||
|
@ -1545,12 +1543,6 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
|
|||
logger.info("updating {} from [{}] to [{}]", InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE, InternalEngine.this.failOnMergeFailure, failOnMergeFailure);
|
||||
InternalEngine.this.failOnMergeFailure = failOnMergeFailure;
|
||||
}
|
||||
if (codecBloomLoad != codecService.isLoadBloomFilter()) {
|
||||
logger.info("updating {} from [{}] to [{}]", CodecService.INDEX_CODEC_BLOOM_LOAD, codecService.isLoadBloomFilter(), codecBloomLoad);
|
||||
codecService.setLoadBloomFilter(codecBloomLoad);
|
||||
// we need to flush in this case, to load/unload the bloom filters
|
||||
requiresFlushing = true;
|
||||
}
|
||||
}
|
||||
if (requiresFlushing) {
|
||||
flush(new Flush().type(Flush.Type.NEW_WRITER));
|
||||
|
|
|
@ -85,7 +85,6 @@ public class IndexDynamicSettingsModule extends AbstractModule {
|
|||
indexDynamicSettings.addDynamicSetting(LogDocMergePolicyProvider.INDEX_COMPOUND_FORMAT);
|
||||
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_INDEX_CONCURRENCY, Validator.NON_NEGATIVE_INTEGER);
|
||||
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_COMPOUND_ON_FLUSH, Validator.BOOLEAN);
|
||||
indexDynamicSettings.addDynamicSetting(CodecService.INDEX_CODEC_BLOOM_LOAD, Validator.BOOLEAN);
|
||||
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_GC_DELETES, Validator.TIME);
|
||||
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_CODEC);
|
||||
indexDynamicSettings.addDynamicSetting(InternalEngine.INDEX_FAIL_ON_MERGE_FAILURE);
|
||||
|
|
|
@ -550,12 +550,6 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
|||
return Store.this.shardId();
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public CodecService codecService() {
|
||||
ensureOpen();
|
||||
return Store.this.codecService;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
assert false : "Nobody should close this directory except of the Store itself";
|
||||
|
|
|
@ -1,3 +1,2 @@
|
|||
org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat
|
||||
org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat
|
||||
org.elasticsearch.search.suggest.completion.Completion090PostingsFormat
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.index.codec;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
|
||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||
|
@ -33,6 +34,7 @@ import org.apache.lucene.codecs.lucene49.Lucene49Codec;
|
|||
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
|
@ -80,25 +82,16 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase {
|
|||
public void testResolveDefaultPostingFormats() throws Exception {
|
||||
PostingsFormatService postingsFormatService = createCodecService().postingsFormatService();
|
||||
assertThat(postingsFormatService.get("default"), instanceOf(PreBuiltPostingsFormatProvider.class));
|
||||
assertThat(postingsFormatService.get("default").get(), instanceOf(Elasticsearch090PostingsFormat.class));
|
||||
PostingsFormat luceneDefault = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
|
||||
assertThat(postingsFormatService.get("default").get(), instanceOf(luceneDefault.getClass()));
|
||||
|
||||
// Should fail when upgrading Lucene with codec changes
|
||||
assertThat(((Elasticsearch090PostingsFormat)postingsFormatService.get("default").get()).getDefaultWrapped(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField("").getClass()));
|
||||
assertThat(postingsFormatService.get("Lucene41"), instanceOf(PreBuiltPostingsFormatProvider.class));
|
||||
// Should fail when upgrading Lucene with codec changes
|
||||
assertThat(postingsFormatService.get("Lucene50").get(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField(null).getClass()));
|
||||
|
||||
assertThat(postingsFormatService.get("bloom_default"), instanceOf(PreBuiltPostingsFormatProvider.class));
|
||||
if (PostingFormats.luceneBloomFilter) {
|
||||
assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilteringPostingsFormat.class));
|
||||
} else {
|
||||
assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilterPostingsFormat.class));
|
||||
}
|
||||
assertThat(postingsFormatService.get("BloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class));
|
||||
assertThat(postingsFormatService.get("BloomFilter").get(), instanceOf(BloomFilteringPostingsFormat.class));
|
||||
|
||||
assertThat(postingsFormatService.get("XBloomFilter"), instanceOf(PreBuiltPostingsFormatProvider.class));
|
||||
assertThat(postingsFormatService.get("XBloomFilter").get(), instanceOf(BloomFilterPostingsFormat.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -128,7 +121,8 @@ public class CodecTests extends ElasticsearchSingleNodeLuceneTestCase {
|
|||
CodecService codecService = createCodecService(indexSettings);
|
||||
DocumentMapper documentMapper = codecService.mapperService().documentMapperParser().parse(mapping);
|
||||
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider(), instanceOf(PreBuiltPostingsFormatProvider.class));
|
||||
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(Elasticsearch090PostingsFormat.class));
|
||||
PostingsFormat luceneDefault = PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT);
|
||||
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(luceneDefault.getClass()));
|
||||
|
||||
assertThat(documentMapper.mappers().name("field2").mapper().postingsFormatProvider(), instanceOf(DefaultPostingsFormatProvider.class));
|
||||
DefaultPostingsFormatProvider provider = (DefaultPostingsFormatProvider) documentMapper.mappers().name("field2").mapper().postingsFormatProvider();
|
||||
|
|
|
@ -1,122 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.codec.postingformat;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat;
|
||||
import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
* Simple smoke test for {@link org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat}
|
||||
*/
|
||||
public class DefaultPostingsFormatTests extends ElasticsearchTestCase {
|
||||
|
||||
private final class TestCodec extends Lucene50Codec {
|
||||
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
return new Elasticsearch090PostingsFormat();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUseDefault() throws IOException {
|
||||
|
||||
Codec codec = new TestCodec();
|
||||
Directory d = new RAMDirectory();
|
||||
IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
|
||||
config.setCodec(codec);
|
||||
IndexWriter writer = new IndexWriter(d, config);
|
||||
writer.addDocument(Arrays.asList(new TextField("foo", "bar", Store.YES), new TextField(UidFieldMapper.NAME, "1234", Store.YES)));
|
||||
writer.commit();
|
||||
DirectoryReader reader = DirectoryReader.open(writer, false);
|
||||
List<LeafReaderContext> leaves = reader.leaves();
|
||||
assertThat(leaves.size(), equalTo(1));
|
||||
LeafReader ar = leaves.get(0).reader();
|
||||
Terms terms = ar.terms("foo");
|
||||
Terms uidTerms = ar.terms(UidFieldMapper.NAME);
|
||||
|
||||
assertThat(terms.size(), equalTo(1l));
|
||||
assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
|
||||
assertThat(uidTerms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoUIDField() throws IOException {
|
||||
|
||||
Codec codec = new TestCodec();
|
||||
Directory d = new RAMDirectory();
|
||||
IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer());
|
||||
config.setCodec(codec);
|
||||
IndexWriter writer = new IndexWriter(d, config);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES), new TextField("some_other_field", "1234", Store.YES)));
|
||||
}
|
||||
writer.forceMerge(1, true);
|
||||
writer.commit();
|
||||
|
||||
DirectoryReader reader = DirectoryReader.open(writer, false);
|
||||
List<LeafReaderContext> leaves = reader.leaves();
|
||||
assertThat(leaves.size(), equalTo(1));
|
||||
LeafReader ar = leaves.get(0).reader();
|
||||
Terms terms = ar.terms("foo");
|
||||
Terms some_other_field = ar.terms("some_other_field");
|
||||
|
||||
assertThat(terms.size(), equalTo(2l));
|
||||
assertThat(terms, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
|
||||
assertThat(some_other_field, not(instanceOf(BloomFilterPostingsFormat.BloomFilteredTerms.class)));
|
||||
TermsEnum iterator = terms.iterator(null);
|
||||
Set<String> expected = new HashSet<>();
|
||||
expected.add("foo");
|
||||
expected.add("bar");
|
||||
while(iterator.next() != null) {
|
||||
expected.remove(iterator.term().utf8ToString());
|
||||
}
|
||||
assertThat(expected.size(), equalTo(0));
|
||||
reader.close();
|
||||
writer.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.codec.postingformat;
|
||||
|
||||
import com.google.common.base.Predicates;
|
||||
import com.google.common.collect.Iterators;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat.BloomFilteredFieldsConsumer;
|
||||
import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
/** read-write version with blooms for testing */
|
||||
public class Elasticsearch090RWPostingsFormat extends Elasticsearch090PostingsFormat {
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
final BloomFilteredFieldsConsumer fieldsConsumer = bloomPostings.fieldsConsumer(state);
|
||||
return new FieldsConsumer() {
|
||||
|
||||
@Override
|
||||
public void write(Fields fields) throws IOException {
|
||||
|
||||
Fields maskedFields = new FilterLeafReader.FilterFields(fields) {
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Iterators.filter(this.in.iterator(), Predicates.not(UID_FIELD_FILTER));
|
||||
}
|
||||
};
|
||||
fieldsConsumer.getDelegate().write(maskedFields);
|
||||
maskedFields = new FilterLeafReader.FilterFields(fields) {
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Iterators.singletonIterator(UidFieldMapper.NAME);
|
||||
}
|
||||
};
|
||||
// only go through bloom for the UID field
|
||||
fieldsConsumer.write(maskedFields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
fieldsConsumer.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -47,9 +47,7 @@ public class ElasticsearchPostingsFormatTest extends BasePostingsFormatTestCase
|
|||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return random().nextBoolean() ?
|
||||
TestUtil.alwaysPostingsFormat(new Elasticsearch090PostingsFormat())
|
||||
: TestUtil.alwaysPostingsFormat(new BloomFilterPostingsFormat(PostingsFormat.forName("Lucene50"), BloomFilter.Factory.DEFAULT));
|
||||
return TestUtil.alwaysPostingsFormat(new Elasticsearch090RWPostingsFormat());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,19 +19,12 @@
|
|||
|
||||
package org.elasticsearch.index.engine.internal;
|
||||
|
||||
import com.google.common.base.Predicate;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.elasticsearch.action.admin.indices.segments.IndexSegments;
|
||||
import org.elasticsearch.action.admin.indices.segments.IndexShardSegments;
|
||||
import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse;
|
||||
import org.elasticsearch.action.admin.indices.segments.ShardSegments;
|
||||
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.util.BloomFilter;
|
||||
import org.elasticsearch.index.codec.CodecService;
|
||||
import org.elasticsearch.index.engine.Segment;
|
||||
import org.elasticsearch.index.merge.policy.AbstractMergePolicyProvider;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Test;
|
||||
|
@ -39,85 +32,9 @@ import org.junit.Test;
|
|||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
|
||||
public class InternalEngineIntegrationTest extends ElasticsearchIntegrationTest {
|
||||
|
||||
@Test
|
||||
@Slow
|
||||
public void testSettingLoadBloomFilterDefaultTrue() throws Exception {
|
||||
client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get();
|
||||
client().prepareIndex("test", "foo").setSource("field", "foo").get();
|
||||
ensureGreen();
|
||||
refresh();
|
||||
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
|
||||
final long segmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
|
||||
logger.info("segments with bloom: {}", segmentsMemoryWithBloom);
|
||||
|
||||
logger.info("updating the setting to unload bloom filters");
|
||||
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
|
||||
logger.info("waiting for memory to match without blooms");
|
||||
awaitBusy(new Predicate<Object>() {
|
||||
public boolean apply(Object o) {
|
||||
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
|
||||
long segmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
|
||||
logger.info("trying segments without bloom: {}", segmentsMemoryWithoutBloom);
|
||||
return segmentsMemoryWithoutBloom == (segmentsMemoryWithBloom - BloomFilter.Factory.DEFAULT.createFilter(1).getSizeInBytes());
|
||||
}
|
||||
});
|
||||
|
||||
logger.info("updating the setting to load bloom filters");
|
||||
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, true)).get();
|
||||
logger.info("waiting for memory to match with blooms");
|
||||
awaitBusy(new Predicate<Object>() {
|
||||
public boolean apply(Object o) {
|
||||
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
|
||||
long newSegmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
|
||||
logger.info("trying segments with bloom: {}", newSegmentsMemoryWithBloom);
|
||||
return newSegmentsMemoryWithBloom == segmentsMemoryWithBloom;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
@Slow
|
||||
public void testSettingLoadBloomFilterDefaultFalse() throws Exception {
|
||||
client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1).put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
|
||||
client().prepareIndex("test", "foo").setSource("field", "foo").get();
|
||||
ensureGreen();
|
||||
refresh();
|
||||
|
||||
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
|
||||
final long segmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
|
||||
logger.info("segments without bloom: {}", segmentsMemoryWithoutBloom);
|
||||
|
||||
logger.info("updating the setting to load bloom filters");
|
||||
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, true)).get();
|
||||
logger.info("waiting for memory to match with blooms");
|
||||
awaitBusy(new Predicate<Object>() {
|
||||
public boolean apply(Object o) {
|
||||
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
|
||||
long segmentsMemoryWithBloom = stats.getTotal().getSegments().getMemoryInBytes();
|
||||
logger.info("trying segments with bloom: {}", segmentsMemoryWithoutBloom);
|
||||
return segmentsMemoryWithoutBloom == (segmentsMemoryWithBloom - BloomFilter.Factory.DEFAULT.createFilter(1).getSizeInBytes());
|
||||
}
|
||||
});
|
||||
|
||||
logger.info("updating the setting to unload bloom filters");
|
||||
client().admin().indices().prepareUpdateSettings("test").setSettings(ImmutableSettings.builder().put(CodecService.INDEX_CODEC_BLOOM_LOAD, false)).get();
|
||||
logger.info("waiting for memory to match without blooms");
|
||||
awaitBusy(new Predicate<Object>() {
|
||||
public boolean apply(Object o) {
|
||||
IndicesStatsResponse stats = client().admin().indices().prepareStats().setSegments(true).get();
|
||||
long newSegmentsMemoryWithoutBloom = stats.getTotal().getSegments().getMemoryInBytes();
|
||||
logger.info("trying segments without bloom: {}", newSegmentsMemoryWithoutBloom);
|
||||
return newSegmentsMemoryWithoutBloom == segmentsMemoryWithoutBloom;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetIndexCompoundOnFlush() {
|
||||
client().admin().indices().prepareCreate("test").setSettings(ImmutableSettings.builder().put("number_of_replicas", 0).put("number_of_shards", 1)).get();
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.store.*;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
|
||||
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
|
||||
|
@ -70,7 +71,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
|||
|
||||
IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
|
||||
LookupFactory load = currentProvider.load(input);
|
||||
PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat());
|
||||
PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT));
|
||||
NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer());
|
||||
Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING), new CompletionSuggestionContext(null));
|
||||
List<LookupResult> result = lookup.lookup("ge", false, 10);
|
||||
|
@ -214,7 +215,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
|||
iter = primaryIter;
|
||||
}
|
||||
reference.build(iter);
|
||||
PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat());
|
||||
PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(PostingsFormat.forName(Lucene.LATEST_POSTINGS_FORMAT));
|
||||
|
||||
NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer());
|
||||
final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads,
|
||||
|
|
|
@ -466,9 +466,6 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
|
|||
builder.put(FsTranslog.INDEX_TRANSLOG_FS_TYPE, RandomPicks.randomFrom(random, FsTranslogFile.Type.values()).name());
|
||||
}
|
||||
|
||||
// Randomly load or don't load bloom filters:
|
||||
builder.put(CodecService.INDEX_CODEC_BLOOM_LOAD, random.nextBoolean());
|
||||
|
||||
if (random.nextBoolean()) {
|
||||
builder.put(IndicesQueryCache.INDEX_CACHE_QUERY_ENABLED, random.nextBoolean());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue