mirror of https://github.com/apache/lucene.git
New addition: Lucene-4069 BloomFilterPostingsFormat for faster access to low-frequency terms such as primary keys.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1368567 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
08209761d3
commit
ad2cefee00
|
@ -15,6 +15,9 @@ New features
|
||||||
underlying PayloadFunction's explanation as the explanation
|
underlying PayloadFunction's explanation as the explanation
|
||||||
for the payload score. (Scott Smerchek via Robert Muir)
|
for the payload score. (Scott Smerchek via Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4069: Added BloomFilteringPostingsFormat for use with low-frequency terms
|
||||||
|
such as primary keys (Mark Harwood, Mike McCandless)
|
||||||
|
|
||||||
* LUCENE-4201: Added JapaneseIterationMarkCharFilter to normalize Japanese
|
* LUCENE-4201: Added JapaneseIterationMarkCharFilter to normalize Japanese
|
||||||
iteration marks. (Robert Muir, Christian Moen)
|
iteration marks. (Robert Muir, Christian Moen)
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,13 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
|
||||||
|
|
||||||
/** Reads a segment. NOTE: by the time this call
|
/** Reads a segment. NOTE: by the time this call
|
||||||
* returns, it must hold open any files it will need to
|
* returns, it must hold open any files it will need to
|
||||||
* use; else, those files may be deleted. */
|
* use; else, those files may be deleted.
|
||||||
|
* Additionally, required files may be deleted during the execution of
|
||||||
|
* this call before there is a chance to open them. Under these
|
||||||
|
* circumstances an IOException should be thrown by the implementation.
|
||||||
|
* IOExceptions are expected and will automatically cause a retry of the
|
||||||
|
* segment opening logic with the newly revised segments.
|
||||||
|
* */
|
||||||
public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException;
|
public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
package org.apache.lucene.codecs.bloom;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.util.FuzzySet;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class used to create index-time {@link FuzzySet} appropriately configured for
|
||||||
|
* each field. Also called to right-size bitsets for serialization.
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class BloomFilterFactory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param state The content to be indexed
|
||||||
|
* @param info
|
||||||
|
* the field requiring a BloomFilter
|
||||||
|
* @return An appropriately sized set or null if no BloomFiltering required
|
||||||
|
*/
|
||||||
|
public abstract FuzzySet getSetForField(SegmentWriteState state, FieldInfo info);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when downsizing bitsets for serialization
|
||||||
|
*
|
||||||
|
* @param fieldInfo
|
||||||
|
* The field with sparse set bits
|
||||||
|
* @param initialSet
|
||||||
|
* The bits accumulated
|
||||||
|
* @return null or a hopefully more densely packed, smaller bitset
|
||||||
|
*/
|
||||||
|
public FuzzySet downsize(FieldInfo fieldInfo, FuzzySet initialSet) {
|
||||||
|
// Aim for a bitset size that would have 10% of bits set (so 90% of searches
|
||||||
|
// would fail-fast)
|
||||||
|
float targetMaxSaturation = 0.1f;
|
||||||
|
return initialSet.downsize(targetMaxSaturation);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to determine if the given filter has reached saturation and should be retired i.e. not saved any more
|
||||||
|
* @param bloomFilter The bloomFilter being tested
|
||||||
|
* @param fieldInfo The field with which this filter is associated
|
||||||
|
* @return true if the set has reached saturation and should be retired
|
||||||
|
*/
|
||||||
|
public abstract boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo);
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,514 @@
|
||||||
|
package org.apache.lucene.codecs.bloom;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsConsumer;
|
||||||
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.TermStats;
|
||||||
|
import org.apache.lucene.codecs.TermsConsumer;
|
||||||
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.FuzzySet;
|
||||||
|
import org.apache.lucene.util.FuzzySet.ContainsResult;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
|
import org.apache.lucene.util.hash.MurmurHash2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* A {@link PostingsFormat} useful for low doc-frequency fields such as primary
|
||||||
|
* keys. Bloom filters are maintained in a ".blm" file which offers "fast-fail"
|
||||||
|
* for reads in segments known to have no record of the key. A choice of
|
||||||
|
* delegate PostingsFormat is used to record all other Postings data.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* A choice of {@link BloomFilterFactory} can be passed to tailor Bloom Filter
|
||||||
|
* settings on a per-field basis. The default configuration is
|
||||||
|
* {@link DefaultBloomFilterFactory} which allocates a ~8mb bitset and hashes
|
||||||
|
* values using {@link MurmurHash2}. This should be suitable for most purposes.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The format of the blm file is as follows:
|
||||||
|
* </p>
|
||||||
|
* <ul>
|
||||||
|
* <li>BloomFilter (.blm) --> Header, DelegatePostingsFormatName,
|
||||||
|
* NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
|
||||||
|
* <li>Filter --> FieldNumber, FuzzySet</li>
|
||||||
|
* <li>FuzzySet -->See {@link FuzzySet#serialize(DataOutput)}</li>
|
||||||
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
|
* <li>DelegatePostingsFormatName --> {@link DataOutput#writeString(String)
|
||||||
|
* String} The name of a ServiceProvider registered {@link PostingsFormat}</li>
|
||||||
|
* <li>NumFilteredFields --> {@link DataOutput#writeInt Uint32}</li>
|
||||||
|
* <li>FieldNumber --> {@link DataOutput#writeInt Uint32} The number of the
|
||||||
|
* field in this segment</li>
|
||||||
|
* </ul>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class BloomFilteringPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
|
public static final String BLOOM_CODEC_NAME = "BloomFilter";
|
||||||
|
public static final int BLOOM_CODEC_VERSION = 1;
|
||||||
|
|
||||||
|
/** Extension of Bloom Filters file */
|
||||||
|
static final String BLOOM_EXTENSION = "blm";
|
||||||
|
|
||||||
|
BloomFilterFactory bloomFilterFactory = new DefaultBloomFilterFactory();
|
||||||
|
private PostingsFormat delegatePostingsFormat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates Bloom filters for a selection of fields created in the index. This
|
||||||
|
* is recorded as a set of Bitsets held as a segment summary in an additional
|
||||||
|
* "blm" file. This PostingsFormat delegates to a choice of delegate
|
||||||
|
* PostingsFormat for encoding all other postings data.
|
||||||
|
*
|
||||||
|
* @param delegatePostingsFormat
|
||||||
|
* The PostingsFormat that records all the non-bloom filter data i.e.
|
||||||
|
* postings info.
|
||||||
|
* @param bloomFilterFactory
|
||||||
|
* The {@link BloomFilterFactory} responsible for sizing BloomFilters
|
||||||
|
* appropriately
|
||||||
|
*/
|
||||||
|
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat,
|
||||||
|
BloomFilterFactory bloomFilterFactory) {
|
||||||
|
super(BLOOM_CODEC_NAME);
|
||||||
|
this.delegatePostingsFormat = delegatePostingsFormat;
|
||||||
|
this.bloomFilterFactory = bloomFilterFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates Bloom filters for a selection of fields created in the index. This
|
||||||
|
* is recorded as a set of Bitsets held as a segment summary in an additional
|
||||||
|
* "blm" file. This PostingsFormat delegates to a choice of delegate
|
||||||
|
* PostingsFormat for encoding all other postings data. This choice of
|
||||||
|
* constructor defaults to the {@link DefaultBloomFilterFactory} for
|
||||||
|
* configuring per-field BloomFilters.
|
||||||
|
*
|
||||||
|
* @param delegatePostingsFormat
|
||||||
|
* The PostingsFormat that records all the non-bloom filter data i.e.
|
||||||
|
* postings info.
|
||||||
|
*/
|
||||||
|
public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) {
|
||||||
|
this(delegatePostingsFormat, new DefaultBloomFilterFactory());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used only by core Lucene at read-time via Service Provider instantiation -
|
||||||
|
// do not use at Write-time in application code.
|
||||||
|
public BloomFilteringPostingsFormat() {
|
||||||
|
super(BLOOM_CODEC_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
|
||||||
|
throws IOException {
|
||||||
|
if (delegatePostingsFormat == null) {
|
||||||
|
throw new UnsupportedOperationException("Error - " + getClass().getName()
|
||||||
|
+ " has been constructed without a choice of PostingsFormat");
|
||||||
|
}
|
||||||
|
return new BloomFilteredFieldsConsumer(
|
||||||
|
delegatePostingsFormat.fieldsConsumer(state), state,
|
||||||
|
delegatePostingsFormat);
|
||||||
|
}
|
||||||
|
|
||||||
|
public FieldsProducer fieldsProducer(SegmentReadState state)
|
||||||
|
throws IOException {
|
||||||
|
return new BloomFilteredFieldsProducer(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class BloomFilteredFieldsProducer extends FieldsProducer {
|
||||||
|
private FieldsProducer delegateFieldsProducer;
|
||||||
|
HashMap<String,FuzzySet> bloomsByFieldName = new HashMap<String,FuzzySet>();
|
||||||
|
|
||||||
|
public BloomFilteredFieldsProducer(SegmentReadState state)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
String bloomFileName = IndexFileNames.segmentFileName(
|
||||||
|
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
||||||
|
IndexInput bloomIn = null;
|
||||||
|
try {
|
||||||
|
bloomIn = state.dir.openInput(bloomFileName, state.context);
|
||||||
|
CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
|
||||||
|
BLOOM_CODEC_VERSION);
|
||||||
|
// // Load the hash function used in the BloomFilter
|
||||||
|
// hashFunction = HashFunction.forName(bloomIn.readString());
|
||||||
|
// Load the delegate postings format
|
||||||
|
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
|
||||||
|
.readString());
|
||||||
|
|
||||||
|
this.delegateFieldsProducer = delegatePostingsFormat
|
||||||
|
.fieldsProducer(state);
|
||||||
|
int numBlooms = bloomIn.readInt();
|
||||||
|
for (int i = 0; i < numBlooms; i++) {
|
||||||
|
int fieldNum = bloomIn.readInt();
|
||||||
|
FuzzySet bloom = FuzzySet.deserialize(bloomIn);
|
||||||
|
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
|
||||||
|
bloomsByFieldName.put(fieldInfo.name, bloom);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
IOUtils.close(bloomIn);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public FieldsEnum iterator() throws IOException {
|
||||||
|
return new BloomFilteredFieldsEnum(delegateFieldsProducer.iterator(),
|
||||||
|
bloomsByFieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
delegateFieldsProducer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Terms terms(String field) throws IOException {
|
||||||
|
FuzzySet filter = bloomsByFieldName.get(field);
|
||||||
|
if (filter == null) {
|
||||||
|
return delegateFieldsProducer.terms(field);
|
||||||
|
} else {
|
||||||
|
Terms result = delegateFieldsProducer.terms(field);
|
||||||
|
if (result == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new BloomFilteredTerms(result, filter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() throws IOException {
|
||||||
|
return delegateFieldsProducer.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getUniqueTermCount() throws IOException {
|
||||||
|
return delegateFieldsProducer.getUniqueTermCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not all fields in a segment may be subject to a bloom filter. This class
|
||||||
|
// wraps Terms objects appropriately if a filtering request is present
|
||||||
|
class BloomFilteredFieldsEnum extends FieldsEnum {
|
||||||
|
private FieldsEnum delegateFieldsEnum;
|
||||||
|
private HashMap<String,FuzzySet> bloomsByFieldName;
|
||||||
|
private String currentFieldName;
|
||||||
|
|
||||||
|
public BloomFilteredFieldsEnum(FieldsEnum iterator,
|
||||||
|
HashMap<String,FuzzySet> bloomsByFieldName) {
|
||||||
|
this.delegateFieldsEnum = iterator;
|
||||||
|
this.bloomsByFieldName = bloomsByFieldName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AttributeSource attributes() {
|
||||||
|
return delegateFieldsEnum.attributes();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String next() throws IOException {
|
||||||
|
currentFieldName = delegateFieldsEnum.next();
|
||||||
|
return currentFieldName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Terms terms() throws IOException {
|
||||||
|
FuzzySet filter = bloomsByFieldName.get(currentFieldName);
|
||||||
|
if (filter == null) {
|
||||||
|
return delegateFieldsEnum.terms();
|
||||||
|
} else {
|
||||||
|
Terms result = delegateFieldsEnum.terms();
|
||||||
|
if (result == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// wrap the terms object with a bloom filter
|
||||||
|
return new BloomFilteredTerms(result, filter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class BloomFilteredTerms extends Terms {
|
||||||
|
private Terms delegateTerms;
|
||||||
|
private FuzzySet filter;
|
||||||
|
|
||||||
|
public BloomFilteredTerms(Terms terms, FuzzySet filter) {
|
||||||
|
this.delegateTerms = terms;
|
||||||
|
this.filter = filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum intersect(CompiledAutomaton compiled,
|
||||||
|
final BytesRef startTerm) throws IOException {
|
||||||
|
return delegateTerms.intersect(compiled, startTerm);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||||
|
TermsEnum result;
|
||||||
|
if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
|
||||||
|
// recycle the existing BloomFilteredTermsEnum by asking the delegate
|
||||||
|
// to recycle its contained TermsEnum
|
||||||
|
BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
|
||||||
|
if (bfte.filter == filter) {
|
||||||
|
bfte.delegateTermsEnum = delegateTerms
|
||||||
|
.iterator(bfte.delegateTermsEnum);
|
||||||
|
return bfte;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We have been handed something we cannot reuse (either null, wrong
|
||||||
|
// class or wrong filter) so allocate a new object
|
||||||
|
result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
|
||||||
|
filter);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
return delegateTerms.getComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size() throws IOException {
|
||||||
|
return delegateTerms.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumTotalTermFreq() throws IOException {
|
||||||
|
return delegateTerms.getSumTotalTermFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return delegateTerms.getSumDocFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocCount() throws IOException {
|
||||||
|
return delegateTerms.getDocCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class BloomFilteredTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
|
TermsEnum delegateTermsEnum;
|
||||||
|
private FuzzySet filter;
|
||||||
|
|
||||||
|
public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
|
||||||
|
this.delegateTermsEnum = iterator;
|
||||||
|
this.filter = filter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final BytesRef next() throws IOException {
|
||||||
|
return delegateTermsEnum.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final Comparator<BytesRef> getComparator() {
|
||||||
|
return delegateTermsEnum.getComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final boolean seekExact(BytesRef text, boolean useCache)
|
||||||
|
throws IOException {
|
||||||
|
// The magical fail-fast speed up that is the entire point of all of
|
||||||
|
// this code - save a disk seek if there is a match on an in-memory
|
||||||
|
// structure
|
||||||
|
// that may occasionally give a false positive but guaranteed no false
|
||||||
|
// negatives
|
||||||
|
if (filter.contains(text) == ContainsResult.NO) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return delegateTermsEnum.seekExact(text, useCache);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final SeekStatus seekCeil(BytesRef text, boolean useCache)
|
||||||
|
throws IOException {
|
||||||
|
return delegateTermsEnum.seekCeil(text, useCache);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void seekExact(long ord) throws IOException {
|
||||||
|
delegateTermsEnum.seekExact(ord);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final BytesRef term() throws IOException {
|
||||||
|
return delegateTermsEnum.term();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final long ord() throws IOException {
|
||||||
|
return delegateTermsEnum.ord();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int docFreq() throws IOException {
|
||||||
|
return delegateTermsEnum.docFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final long totalTermFreq() throws IOException {
|
||||||
|
return delegateTermsEnum.totalTermFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
|
||||||
|
DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||||
|
return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
|
||||||
|
throws IOException {
|
||||||
|
return delegateTermsEnum.docs(liveDocs, reuse, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class BloomFilteredFieldsConsumer extends FieldsConsumer {
|
||||||
|
private FieldsConsumer delegateFieldsConsumer;
|
||||||
|
private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
|
||||||
|
private SegmentWriteState state;
|
||||||
|
|
||||||
|
// private PostingsFormat delegatePostingsFormat;
|
||||||
|
|
||||||
|
public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
|
||||||
|
SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
|
||||||
|
this.delegateFieldsConsumer = fieldsConsumer;
|
||||||
|
// this.delegatePostingsFormat=delegatePostingsFormat;
|
||||||
|
this.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsConsumer addField(FieldInfo field) throws IOException {
|
||||||
|
FuzzySet bloomFilter = bloomFilterFactory.getSetForField(state,field);
|
||||||
|
if (bloomFilter != null) {
|
||||||
|
assert bloomFilters.containsKey(field) == false;
|
||||||
|
bloomFilters.put(field, bloomFilter);
|
||||||
|
return new WrappedTermsConsumer(delegateFieldsConsumer.addField(field),bloomFilter);
|
||||||
|
} else {
|
||||||
|
// No, use the unfiltered fieldsConsumer - we are not interested in
|
||||||
|
// recording any term Bitsets.
|
||||||
|
return delegateFieldsConsumer.addField(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
delegateFieldsConsumer.close();
|
||||||
|
// Now we are done accumulating values for these fields
|
||||||
|
List<Entry<FieldInfo,FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo,FuzzySet>>();
|
||||||
|
|
||||||
|
for (Entry<FieldInfo,FuzzySet> entry : bloomFilters.entrySet()) {
|
||||||
|
FuzzySet bloomFilter = entry.getValue();
|
||||||
|
if(!bloomFilterFactory.isSaturated(bloomFilter,entry.getKey())){
|
||||||
|
nonSaturatedBlooms.add(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String bloomFileName = IndexFileNames.segmentFileName(
|
||||||
|
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
||||||
|
IndexOutput bloomOutput = null;
|
||||||
|
try {
|
||||||
|
bloomOutput = state.directory
|
||||||
|
.createOutput(bloomFileName, state.context);
|
||||||
|
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
|
||||||
|
BLOOM_CODEC_VERSION);
|
||||||
|
// remember the name of the postings format we will delegate to
|
||||||
|
bloomOutput.writeString(delegatePostingsFormat.getName());
|
||||||
|
|
||||||
|
// First field in the output file is the number of fields+blooms saved
|
||||||
|
bloomOutput.writeInt(nonSaturatedBlooms.size());
|
||||||
|
for (Entry<FieldInfo,FuzzySet> entry : nonSaturatedBlooms) {
|
||||||
|
FieldInfo fieldInfo = entry.getKey();
|
||||||
|
FuzzySet bloomFilter = entry.getValue();
|
||||||
|
bloomOutput.writeInt(fieldInfo.number);
|
||||||
|
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
IOUtils.close(bloomOutput);
|
||||||
|
}
|
||||||
|
//We are done with large bitsets so no need to keep them hanging around
|
||||||
|
bloomFilters.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
|
||||||
|
FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
|
||||||
|
|
||||||
|
FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
|
||||||
|
bloomFilter);
|
||||||
|
if (rightSizedSet == null) {
|
||||||
|
rightSizedSet = bloomFilter;
|
||||||
|
}
|
||||||
|
rightSizedSet.serialize(bloomOutput);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class WrappedTermsConsumer extends TermsConsumer {
|
||||||
|
private TermsConsumer delegateTermsConsumer;
|
||||||
|
private FuzzySet bloomFilter;
|
||||||
|
|
||||||
|
public WrappedTermsConsumer(TermsConsumer termsConsumer,FuzzySet bloomFilter) {
|
||||||
|
this.delegateTermsConsumer = termsConsumer;
|
||||||
|
this.bloomFilter = bloomFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||||
|
return delegateTermsConsumer.startTerm(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||||
|
|
||||||
|
// Record this term in our BloomFilter
|
||||||
|
if (stats.docFreq > 0) {
|
||||||
|
bloomFilter.addValue(text);
|
||||||
|
}
|
||||||
|
delegateTermsConsumer.finishTerm(text, stats);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
|
||||||
|
throws IOException {
|
||||||
|
delegateTermsConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Comparator<BytesRef> getComparator() throws IOException {
|
||||||
|
return delegateTermsConsumer.getComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
package org.apache.lucene.codecs.bloom;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.util.FuzzySet;
|
||||||
|
import org.apache.lucene.util.hash.HashFunction;
|
||||||
|
import org.apache.lucene.util.hash.MurmurHash2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default policy is to allocate a bitset with 10% saturation given a unique term per document.
|
||||||
|
* Bits are set via MurmurHash2 hashing function.
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class DefaultBloomFilterFactory extends BloomFilterFactory {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FuzzySet getSetForField(SegmentWriteState state,FieldInfo info) {
|
||||||
|
//Assume all of the docs have a unique term (e.g. a primary key) and we hope to maintain a set with 10% of bits set
|
||||||
|
return FuzzySet.createSetBasedOnQuality(state.segmentInfo.getDocCount(), 0.10f, new MurmurHash2());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) {
|
||||||
|
// Don't bother saving bitsets if >90% of bits are set - we don't want to
|
||||||
|
// throw any more memory at this problem.
|
||||||
|
return bloomFilter.getSaturation() > 0.9f;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Codec PostingsFormat for fast access to low-frequency terms such as primary key fields.
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -54,6 +54,11 @@ public final class FixedBitSet extends DocIdSet implements Bits {
|
||||||
bits = new long[bits2words(numBits)];
|
bits = new long[bits2words(numBits)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public FixedBitSet(long[]storedBits,int numBits) {
|
||||||
|
this.numBits = numBits;
|
||||||
|
this.bits = storedBits;
|
||||||
|
}
|
||||||
|
|
||||||
/** Makes full copy. */
|
/** Makes full copy. */
|
||||||
public FixedBitSet(FixedBitSet other) {
|
public FixedBitSet(FixedBitSet other) {
|
||||||
bits = new long[other.bits.length];
|
bits = new long[other.bits.length];
|
||||||
|
|
|
@ -0,0 +1,292 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.util.hash.HashFunction;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* A class used to represent a set of many, potentially large, values (e.g. many
|
||||||
|
* long strings such as URLs), using a significantly smaller amount of memory.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The set is "lossy" in that it cannot definitively state that is does contain
|
||||||
|
* a value but it <em>can</em> definitively say if a value is <em>not</em> in
|
||||||
|
* the set. It can therefore be used as a Bloom Filter.
|
||||||
|
* </p>
|
||||||
|
* Another application of the set is that it can be used to perform fuzzy counting because
|
||||||
|
* it can estimate reasonably accurately how many unique values are contained in the set.
|
||||||
|
* </p>
|
||||||
|
* <p>This class is NOT threadsafe.</p>
|
||||||
|
* <p>
|
||||||
|
* Internally a Bitset is used to record values and once a client has finished recording
|
||||||
|
* a stream of values the {@link #downsize(float)} method can be used to create a suitably smaller set that
|
||||||
|
* is sized appropriately for the number of values recorded and desired saturation levels.
|
||||||
|
*
|
||||||
|
* </p>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class FuzzySet {
|
||||||
|
|
||||||
|
public static final int FUZZY_SERIALIZATION_VERSION=1;
|
||||||
|
|
||||||
|
public enum ContainsResult {
|
||||||
|
MAYBE, NO
|
||||||
|
};
|
||||||
|
private HashFunction hashFunction;
|
||||||
|
private FixedBitSet filter;
|
||||||
|
private int bloomSize;
|
||||||
|
|
||||||
|
//The sizes of BitSet used are all numbers that, when expressed in binary form,
|
||||||
|
//are all ones. This is to enable fast downsizing from one bitset to another
|
||||||
|
//by simply ANDing each set index in one bitset with the size of the target bitset
|
||||||
|
// - this provides a fast modulo of the number. Values previously accumulated in
|
||||||
|
// a large bitset and then mapped to a smaller set can be looked up using a single
|
||||||
|
// AND operation of the query term's hash rather than needing to perform a 2-step
|
||||||
|
// translation of the query term that mirrors the stored content's reprojections.
|
||||||
|
static final int usableBitSetSizes[];
|
||||||
|
static
|
||||||
|
{
|
||||||
|
usableBitSetSizes=new int[30];
|
||||||
|
int mask=1;
|
||||||
|
int size=mask;
|
||||||
|
for (int i = 0; i < usableBitSetSizes.length; i++) {
|
||||||
|
size=(size<<1)|mask;
|
||||||
|
usableBitSetSizes[i]=size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rounds down required maxNumberOfBits to the nearest number that is made up
|
||||||
|
* of all ones as a binary number.
|
||||||
|
* Use this method where controlling memory use is paramount.
|
||||||
|
*/
|
||||||
|
public static int getNearestSetSize(int maxNumberOfBits)
|
||||||
|
{
|
||||||
|
int result=usableBitSetSizes[0];
|
||||||
|
for (int i = 0; i < usableBitSetSizes.length; i++) {
|
||||||
|
if(usableBitSetSizes[i]<=maxNumberOfBits)
|
||||||
|
{
|
||||||
|
result=usableBitSetSizes[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use this method to choose a set size where accuracy (low content saturation) is more important
|
||||||
|
* than deciding how much memory to throw at the problem.
|
||||||
|
* @param maxNumberOfValuesExpected
|
||||||
|
* @param desiredSaturation A number between 0 and 1 expressing the % of bits set once all values have been recorded
|
||||||
|
* @return The size of the set nearest to the required size
|
||||||
|
*/
|
||||||
|
public static int getNearestSetSize(int maxNumberOfValuesExpected,
|
||||||
|
float desiredSaturation) {
|
||||||
|
// Iterate around the various scales of bitset from smallest to largest looking for the first that
|
||||||
|
// satisfies value volumes at the chosen saturation level
|
||||||
|
for (int i = 0; i < usableBitSetSizes.length; i++) {
|
||||||
|
int numSetBitsAtDesiredSaturation = (int) (usableBitSetSizes[i] * desiredSaturation);
|
||||||
|
int estimatedNumUniqueValues = getEstimatedNumberUniqueValuesAllowingForCollisions(
|
||||||
|
usableBitSetSizes[i], numSetBitsAtDesiredSaturation);
|
||||||
|
if (estimatedNumUniqueValues > maxNumberOfValuesExpected) {
|
||||||
|
return usableBitSetSizes[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FuzzySet createSetBasedOnMaxMemory(int maxNumBytes, HashFunction hashFunction)
|
||||||
|
{
|
||||||
|
int setSize=getNearestSetSize(maxNumBytes);
|
||||||
|
return new FuzzySet(new FixedBitSet(setSize+1),setSize,hashFunction);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FuzzySet createSetBasedOnQuality(int maxNumUniqueValues, float desiredMaxSaturation, HashFunction hashFunction)
|
||||||
|
{
|
||||||
|
int setSize=getNearestSetSize(maxNumUniqueValues,desiredMaxSaturation);
|
||||||
|
return new FuzzySet(new FixedBitSet(setSize+1),setSize,hashFunction);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private FuzzySet(FixedBitSet filter, int bloomSize, HashFunction hashFunction) {
|
||||||
|
super();
|
||||||
|
this.filter = filter;
|
||||||
|
this.bloomSize = bloomSize;
|
||||||
|
this.hashFunction=hashFunction;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The main method required for a Bloom filter which, given a value determines set membership.
|
||||||
|
* Unlike a conventional set, the fuzzy set returns NO or MAYBE rather than true or false.
|
||||||
|
* @param value
|
||||||
|
* @return NO or MAYBE
|
||||||
|
*/
|
||||||
|
public ContainsResult contains(BytesRef value) {
|
||||||
|
int hash = hashFunction.hash(value);
|
||||||
|
if (hash < 0) {
|
||||||
|
hash = hash * -1;
|
||||||
|
}
|
||||||
|
return mayContainValue(hash);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serializes the data set to file using the following format:
|
||||||
|
* <ul>
|
||||||
|
* <li>FuzzySet -->FuzzySetVersion,HashFunctionName,BloomSize,
|
||||||
|
* NumBitSetWords,BitSetWord<sup>NumBitSetWords</sup></li>
|
||||||
|
* <li>HashFunctionName --> {@link DataOutput#writeString(String) String} The
|
||||||
|
* name of a ServiceProvider registered {@link HashFunction}</li>
|
||||||
|
* <li>FuzzySetVersion --> {@link DataOutput#writeInt Uint32} The version number of the {@link FuzzySet} class</li>
|
||||||
|
* <li>BloomSize --> {@link DataOutput#writeInt Uint32} The modulo value used
|
||||||
|
* to project hashes into the field's Bitset</li>
|
||||||
|
* <li>NumBitSetWords --> {@link DataOutput#writeInt Uint32} The number of
|
||||||
|
* longs (as returned from {@link FixedBitSet#getBits})</li>
|
||||||
|
* <li>BitSetWord --> {@link DataOutput#writeLong Long} A long from the array
|
||||||
|
* returned by {@link FixedBitSet#getBits}</li>
|
||||||
|
* </ul>
|
||||||
|
* @param out Data output stream
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void serialize(DataOutput out) throws IOException
|
||||||
|
{
|
||||||
|
out.writeInt(FUZZY_SERIALIZATION_VERSION);
|
||||||
|
out.writeString(hashFunction.getName());
|
||||||
|
out.writeInt(bloomSize);
|
||||||
|
long[] bits = filter.getBits();
|
||||||
|
out.writeInt(bits.length);
|
||||||
|
for (int i = 0; i < bits.length; i++) {
|
||||||
|
// Can't used VLong encoding because cant cope with negative numbers
|
||||||
|
// output by FixedBitSet
|
||||||
|
out.writeLong(bits[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public static FuzzySet deserialize(DataInput in) throws IOException
|
||||||
|
{
|
||||||
|
int version=in.readInt();
|
||||||
|
if(version!=FUZZY_SERIALIZATION_VERSION)
|
||||||
|
{
|
||||||
|
throw new IOException("Error deserializing: set version is not "+FUZZY_SERIALIZATION_VERSION);
|
||||||
|
}
|
||||||
|
HashFunction hashFunction=HashFunction.forName(in.readString());
|
||||||
|
int bloomSize=in.readInt();
|
||||||
|
int numLongs=in.readInt();
|
||||||
|
long[]longs=new long[numLongs];
|
||||||
|
for (int i = 0; i < numLongs; i++) {
|
||||||
|
longs[i]=in.readLong();
|
||||||
|
}
|
||||||
|
FixedBitSet bits = new FixedBitSet(longs,bloomSize+1);
|
||||||
|
return new FuzzySet(bits,bloomSize,hashFunction);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ContainsResult mayContainValue(int positiveHash) {
|
||||||
|
assert positiveHash >= 0;
|
||||||
|
// Bloom sizes are always base 2 and so can be ANDed for a fast modulo
|
||||||
|
int pos = positiveHash & bloomSize;
|
||||||
|
if (filter.get(pos)) {
|
||||||
|
// This term may be recorded in this index (but could be a collision)
|
||||||
|
return ContainsResult.MAYBE;
|
||||||
|
}
|
||||||
|
// definitely NOT in this segment
|
||||||
|
return ContainsResult.NO;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records a value in the set. The referenced bytes are hashed and then modulo n'd where n is the
|
||||||
|
* chosen size of the internal bitset.
|
||||||
|
* @param value the key value to be hashed
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void addValue(BytesRef value) throws IOException {
|
||||||
|
int hash = hashFunction.hash(value);
|
||||||
|
if (hash < 0) {
|
||||||
|
hash = hash * -1;
|
||||||
|
}
|
||||||
|
// Bitmasking using bloomSize is effectively a modulo operation.
|
||||||
|
int bloomPos = hash & bloomSize;
|
||||||
|
filter.set(bloomPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param targetMaxSaturation A number between 0 and 1 describing the % of bits that would ideally be set in the
|
||||||
|
* result. Lower values have better qccuracy but require more space.
|
||||||
|
* @return a smaller FuzzySet or null if the current set is already over-saturated
|
||||||
|
*/
|
||||||
|
public FuzzySet downsize(float targetMaxSaturation)
|
||||||
|
{
|
||||||
|
int numBitsSet = filter.cardinality();
|
||||||
|
FixedBitSet rightSizedBitSet = filter;
|
||||||
|
int rightSizedBitSetSize = bloomSize;
|
||||||
|
//Hopefully find a smaller size bitset into which we can project accumulated values while maintaining desired saturation level
|
||||||
|
for (int i = 0; i < usableBitSetSizes.length; i++) {
|
||||||
|
int candidateBitsetSize = usableBitSetSizes[i];
|
||||||
|
float candidateSaturation = (float) numBitsSet
|
||||||
|
/ (float) candidateBitsetSize;
|
||||||
|
if (candidateSaturation <= targetMaxSaturation) {
|
||||||
|
rightSizedBitSetSize = candidateBitsetSize;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Re-project the numbers to a smaller space if necessary
|
||||||
|
if (rightSizedBitSetSize < bloomSize) {
|
||||||
|
// Reset the choice of bitset to the smaller version
|
||||||
|
rightSizedBitSet = new FixedBitSet(rightSizedBitSetSize + 1);
|
||||||
|
// Map across the bits from the large set to the smaller one
|
||||||
|
int bitIndex = 0;
|
||||||
|
do {
|
||||||
|
bitIndex = filter.nextSetBit(bitIndex);
|
||||||
|
if (bitIndex >= 0) {
|
||||||
|
// Project the larger number into a smaller one effectively
|
||||||
|
// modulo-ing by using the target bitset size as a mask
|
||||||
|
int downSizedBitIndex = bitIndex & rightSizedBitSetSize;
|
||||||
|
rightSizedBitSet.set(downSizedBitIndex);
|
||||||
|
bitIndex++;
|
||||||
|
}
|
||||||
|
} while ( (bitIndex >= 0)&&(bitIndex<=bloomSize));
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new FuzzySet(rightSizedBitSet,rightSizedBitSetSize, hashFunction);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getEstimatedUniqueValues()
|
||||||
|
{
|
||||||
|
return getEstimatedNumberUniqueValuesAllowingForCollisions(bloomSize, filter.cardinality());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given a set size and a the number of set bits, produces an estimate of the number of unique values recorded
|
||||||
|
public static int getEstimatedNumberUniqueValuesAllowingForCollisions(
|
||||||
|
int setSize, int numRecordedBits) {
|
||||||
|
double setSizeAsDouble = setSize;
|
||||||
|
double numRecordedBitsAsDouble = numRecordedBits;
|
||||||
|
double saturation = numRecordedBitsAsDouble / setSizeAsDouble;
|
||||||
|
double logInverseSaturation = Math.log(1 - saturation) * -1;
|
||||||
|
return (int) (setSizeAsDouble * logInverseSaturation);
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getSaturation() {
|
||||||
|
int numBitsSet = filter.cardinality();
|
||||||
|
return (float) numBitsSet / (float) bloomSize;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
package org.apache.lucene.util.hash;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.NamedSPILoader;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for hashing functions that can be referred to by name.
|
||||||
|
* Subclasses are expected to provide threadsafe implementations of the hash function
|
||||||
|
* on the range of bytes referenced in the provided {@link BytesRef}
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class HashFunction implements NamedSPILoader.NamedSPI {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hashes the contents of the referenced bytes
|
||||||
|
* @param bytes the data to be hashed
|
||||||
|
* @return the hash of the bytes referenced by bytes.offset and length bytes.length
|
||||||
|
*/
|
||||||
|
public abstract int hash(BytesRef bytes);
|
||||||
|
|
||||||
|
private static final NamedSPILoader<HashFunction> loader =
|
||||||
|
new NamedSPILoader<HashFunction>(HashFunction.class);
|
||||||
|
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
public HashFunction(String name) {
|
||||||
|
NamedSPILoader.checkServiceName(name);
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns this codec's name */
|
||||||
|
@Override
|
||||||
|
public final String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** looks up a hash function by name */
|
||||||
|
public static HashFunction forName(String name) {
|
||||||
|
return loader.lookup(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns a list of all available hash function names */
|
||||||
|
public static Set<String> availableHashFunctionNames() {
|
||||||
|
return loader.availableServices();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,105 @@
|
||||||
|
package org.apache.lucene.util.hash;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a very fast, non-cryptographic hash suitable for general hash-based
|
||||||
|
* lookup. See http://murmurhash.googlepages.com/ for more details.
|
||||||
|
* <p>
|
||||||
|
* The C version of MurmurHash 2.0 found at that site was ported to Java by
|
||||||
|
* Andrzej Bialecki (ab at getopt org).
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The code from getopt.org was adapted by Mark Harwood in the form here as one of a pluggable choice of
|
||||||
|
* hashing functions as the core function had to be adapted to work with BytesRefs with offsets and lengths
|
||||||
|
* rather than raw byte arrays.
|
||||||
|
* </p>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class MurmurHash2 extends HashFunction{
|
||||||
|
|
||||||
|
|
||||||
|
public static final String HASH_NAME="MurmurHash2";
|
||||||
|
|
||||||
|
public MurmurHash2() {
|
||||||
|
super(HASH_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int hash(byte[] data, int seed, int offset, int len) {
|
||||||
|
int m = 0x5bd1e995;
|
||||||
|
int r = 24;
|
||||||
|
int h = seed ^ len;
|
||||||
|
int len_4 = len >> 2;
|
||||||
|
for (int i = 0; i < len_4; i++) {
|
||||||
|
int i_4 = offset + (i << 2);
|
||||||
|
int k = data[i_4 + 3];
|
||||||
|
k = k << 8;
|
||||||
|
k = k | (data[i_4 + 2] & 0xff);
|
||||||
|
k = k << 8;
|
||||||
|
k = k | (data[i_4 + 1] & 0xff);
|
||||||
|
k = k << 8;
|
||||||
|
k = k | (data[i_4 + 0] & 0xff);
|
||||||
|
k *= m;
|
||||||
|
k ^= k >>> r;
|
||||||
|
k *= m;
|
||||||
|
h *= m;
|
||||||
|
h ^= k;
|
||||||
|
}
|
||||||
|
int len_m = len_4 << 2;
|
||||||
|
int left = len - len_m;
|
||||||
|
if (left != 0) {
|
||||||
|
if (left >= 3) {
|
||||||
|
h ^= data[offset + len - 3] << 16;
|
||||||
|
}
|
||||||
|
if (left >= 2) {
|
||||||
|
h ^= data[offset + len - 2] << 8;
|
||||||
|
}
|
||||||
|
if (left >= 1) {
|
||||||
|
h ^= data[offset + len - 1];
|
||||||
|
}
|
||||||
|
h *= m;
|
||||||
|
}
|
||||||
|
h ^= h >>> 13;
|
||||||
|
h *= m;
|
||||||
|
h ^= h >>> 15;
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates 32 bit hash from byte array with default seed value.
|
||||||
|
*
|
||||||
|
* @param data
|
||||||
|
* byte array to hash
|
||||||
|
* @param offset
|
||||||
|
* the start position in the array to hash
|
||||||
|
* @param len
|
||||||
|
* length of the array elements to hash
|
||||||
|
* @return 32 bit hash of the given array
|
||||||
|
*/
|
||||||
|
public static final int hash32(final byte[] data, int offset, int len) {
|
||||||
|
return MurmurHash2.hash(data, 0x9747b28c, offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int hash(BytesRef br) {
|
||||||
|
return hash32(br.bytes, br.offset, br.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Hashing functions load-able via SPI service
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -17,4 +17,5 @@ org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat
|
||||||
org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat
|
org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat
|
||||||
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
|
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
|
||||||
org.apache.lucene.codecs.memory.MemoryPostingsFormat
|
org.apache.lucene.codecs.memory.MemoryPostingsFormat
|
||||||
|
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
|
||||||
org.apache.lucene.codecs.memory.DirectPostingsFormat
|
org.apache.lucene.codecs.memory.DirectPostingsFormat
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
org.apache.lucene.util.hash.MurmurHash2
|
|
@ -0,0 +1,77 @@
|
||||||
|
package org.apache.lucene.codecs.bloom;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.util.FuzzySet;
|
||||||
|
import org.apache.lucene.util.hash.MurmurHash2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class used for testing {@link BloomFilteringPostingsFormat} with a concrete
|
||||||
|
* delegate (Lucene40). Creates a Bloom filter on ALL fields and with tiny
|
||||||
|
* amounts of memory reserved for the filter. DO NOT USE IN A PRODUCTION
|
||||||
|
* APPLICATION This is not a realistic application of Bloom Filters as they
|
||||||
|
* ordinarily are larger and operate on only primary key type fields.
|
||||||
|
*/
|
||||||
|
public class TestBloomFilteredLucene40Postings extends PostingsFormat {
|
||||||
|
|
||||||
|
private BloomFilteringPostingsFormat delegate;
|
||||||
|
|
||||||
|
// Special class used to avoid OOM exceptions where Junit tests create many
|
||||||
|
// fields.
|
||||||
|
static class LowMemoryBloomFactory extends BloomFilterFactory {
|
||||||
|
@Override
|
||||||
|
public FuzzySet getSetForField(SegmentWriteState state,FieldInfo info) {
|
||||||
|
return FuzzySet.createSetBasedOnMaxMemory(1024, new MurmurHash2());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isSaturated(FuzzySet bloomFilter, FieldInfo fieldInfo) {
|
||||||
|
// For test purposes always maintain the BloomFilter - even past the point
|
||||||
|
// of usefulness when all bits are set
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public TestBloomFilteredLucene40Postings() {
|
||||||
|
super("TestBloomFilteredLucene40Postings");
|
||||||
|
delegate = new BloomFilteringPostingsFormat(new Lucene40PostingsFormat(),
|
||||||
|
new LowMemoryBloomFactory());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
|
||||||
|
throws IOException {
|
||||||
|
return delegate.fieldsConsumer(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsProducer fieldsProducer(SegmentReadState state)
|
||||||
|
throws IOException {
|
||||||
|
return delegate.fieldsProducer(state);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Support for generating test indexes using the BloomFilteringPostingsFormat
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -29,6 +29,7 @@ import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
|
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
|
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
|
||||||
import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds;
|
import org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds;
|
||||||
|
@ -98,6 +99,10 @@ public class RandomCodec extends Lucene40Codec {
|
||||||
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
|
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
|
||||||
// add pulsing again with (usually) different parameters
|
// add pulsing again with (usually) different parameters
|
||||||
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
|
new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
|
||||||
|
//TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene40Postings to be constructed
|
||||||
|
//with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing
|
||||||
|
//with such "wrapper" classes?
|
||||||
|
new TestBloomFilteredLucene40Postings(),
|
||||||
new MockSepPostingsFormat(),
|
new MockSepPostingsFormat(),
|
||||||
new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)),
|
new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)),
|
||||||
new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)),
|
new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)),
|
||||||
|
|
|
@ -20,5 +20,6 @@ org.apache.lucene.codecs.mocksep.MockSepPostingsFormat
|
||||||
org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat
|
org.apache.lucene.codecs.nestedpulsing.NestedPulsingPostingsFormat
|
||||||
org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat
|
org.apache.lucene.codecs.ramonly.RAMOnlyPostingsFormat
|
||||||
org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds
|
org.apache.lucene.codecs.lucene40ords.Lucene40WithOrds
|
||||||
|
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene40Postings
|
||||||
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
|
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue