mirror of https://github.com/apache/lucene.git
parent
7b9f212907
commit
c7cf9e8e4f
|
@ -118,7 +118,8 @@ Improvements
|
|||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* LUCENE-9254: UniformSplit keeps FST off-heap. (Bruno Roustant)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FSTCompiler;
|
||||
import org.apache.lucene.util.fst.OffHeapFSTStore;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
|
@ -56,24 +57,24 @@ public class FSTDictionary implements IndexDictionary {
|
|||
|
||||
private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(FSTDictionary.class);
|
||||
|
||||
protected final FST<Long> dictionary;
|
||||
protected final FST<Long> fst;
|
||||
|
||||
protected FSTDictionary(FST<Long> dictionary) {
|
||||
this.dictionary = dictionary;
|
||||
protected FSTDictionary(FST<Long> fst) {
|
||||
this.fst = fst;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_USAGE + dictionary.ramBytesUsed();
|
||||
return BASE_RAM_USAGE + fst.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput output, BlockEncoder blockEncoder) throws IOException {
|
||||
if (blockEncoder == null) {
|
||||
dictionary.save(output);
|
||||
fst.save(output);
|
||||
} else {
|
||||
ByteBuffersDataOutput bytesDataOutput = ByteBuffersDataOutput.newResettableInstance();
|
||||
dictionary.save(bytesDataOutput);
|
||||
fst.save(bytesDataOutput);
|
||||
BlockEncoder.WritableBytes encodedBytes = blockEncoder.encode(bytesDataOutput.toDataInput(), bytesDataOutput.size());
|
||||
output.writeVLong(encodedBytes.size());
|
||||
encodedBytes.writeTo(output);
|
||||
|
@ -84,7 +85,7 @@ public class FSTDictionary implements IndexDictionary {
|
|||
* Reads a {@link FSTDictionary} from the provided input.
|
||||
* @param blockDecoder The {@link BlockDecoder} to use for specific decoding; or null if none.
|
||||
*/
|
||||
protected static FSTDictionary read(DataInput input, BlockDecoder blockDecoder) throws IOException {
|
||||
protected static FSTDictionary read(DataInput input, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
|
||||
DataInput fstDataInput;
|
||||
if (blockDecoder == null) {
|
||||
fstDataInput = input;
|
||||
|
@ -92,10 +93,14 @@ public class FSTDictionary implements IndexDictionary {
|
|||
long numBytes = input.readVLong();
|
||||
BytesRef decodedBytes = blockDecoder.decode(input, numBytes);
|
||||
fstDataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
|
||||
// OffHeapFSTStore.init() requires a DataInput which is an instance of IndexInput.
|
||||
// When the block is decoded we must load the FST on heap.
|
||||
isFSTOnHeap = true;
|
||||
}
|
||||
PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
|
||||
FST<Long> dictionary = new FST<>(fstDataInput, fstOutputs);
|
||||
return new FSTDictionary(dictionary);
|
||||
FST<Long> fst = isFSTOnHeap ? new FST<>(fstDataInput, fstOutputs)
|
||||
: new FST<>(fstDataInput, fstOutputs, new OffHeapFSTStore());
|
||||
return new FSTDictionary(fst);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -109,7 +114,7 @@ public class FSTDictionary implements IndexDictionary {
|
|||
*/
|
||||
protected class Browser implements IndexDictionary.Browser {
|
||||
|
||||
protected final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(dictionary);
|
||||
protected final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
|
||||
|
||||
@Override
|
||||
public long seekBlock(BytesRef term) throws IOException {
|
||||
|
@ -127,16 +132,19 @@ public class FSTDictionary implements IndexDictionary {
|
|||
|
||||
protected final IndexInput dictionaryInput;
|
||||
protected final BlockDecoder blockDecoder;
|
||||
protected final boolean isFSTOnHeap;
|
||||
|
||||
/**
|
||||
* Lazy loaded immutable index dictionary (trie hold in RAM).
|
||||
* Lazy loaded immutable index dictionary FST.
|
||||
* The FST is either kept off-heap, or hold in RAM on-heap.
|
||||
*/
|
||||
protected IndexDictionary dictionary;
|
||||
|
||||
public BrowserSupplier(IndexInput dictionaryInput, long startFilePointer, BlockDecoder blockDecoder) throws IOException {
|
||||
public BrowserSupplier(IndexInput dictionaryInput, long dictionaryStartFP, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
|
||||
this.dictionaryInput = dictionaryInput.clone();
|
||||
this.dictionaryInput.seek(startFilePointer);
|
||||
this.dictionaryInput.seek(dictionaryStartFP);
|
||||
this.blockDecoder = blockDecoder;
|
||||
this.isFSTOnHeap = isFSTOnHeap;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -147,7 +155,7 @@ public class FSTDictionary implements IndexDictionary {
|
|||
if (dictionary == null) {
|
||||
synchronized (this) {
|
||||
if (dictionary == null) {
|
||||
dictionary = read(dictionaryInput, blockDecoder);
|
||||
dictionary = read(dictionaryInput, blockDecoder, isFSTOnHeap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,31 +59,38 @@ public class FieldMetadata implements Accountable {
|
|||
protected BytesRef lastTerm;
|
||||
|
||||
/**
|
||||
* Constructs a {@link FieldMetadata} used for writing the index. This {@link FieldMetadata} is mutable.
|
||||
*
|
||||
* Constructs field metadata for writing.
|
||||
* @param maxDoc The total number of documents in the segment being written.
|
||||
*/
|
||||
public FieldMetadata(FieldInfo fieldInfo, int maxDoc) {
|
||||
this(fieldInfo, maxDoc, true);
|
||||
}
|
||||
|
||||
public FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable) {
|
||||
this(fieldInfo, maxDoc, isMutable, -1, -1, null);
|
||||
/**
|
||||
* Constructs immutable virtual field metadata for reading.
|
||||
*/
|
||||
public FieldMetadata(long dictionaryStartFP, long firstBlockStartFP, long lastBlockStartFP, BytesRef lastTerm) {
|
||||
this(null, 0, false);
|
||||
this.dictionaryStartFP = dictionaryStartFP;
|
||||
this.firstBlockStartFP = firstBlockStartFP;
|
||||
this.lastBlockStartFP = lastBlockStartFP;
|
||||
this.lastTerm = lastTerm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs field metadata for reading or writing.
|
||||
* @param maxDoc The total number of documents in the segment being written.
|
||||
* @param isMutable Set true if this FieldMetadata is created for writing the index. Set false if it is used for reading the index.
|
||||
*/
|
||||
public FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable, long firstBlockStartFP, long lastBlockStartFP, BytesRef lastTerm) {
|
||||
protected FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable) {
|
||||
assert isMutable || maxDoc == 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.isMutable = isMutable;
|
||||
// docsSeen must not be set if this FieldMetadata is immutable, that means it is used for reading the index.
|
||||
this.docsSeen = isMutable ? new FixedBitSet(maxDoc) : null;
|
||||
this.dictionaryStartFP = -1;
|
||||
this.firstBlockStartFP = firstBlockStartFP;
|
||||
this.lastBlockStartFP = lastBlockStartFP;
|
||||
this.lastTerm = lastTerm;
|
||||
this.firstBlockStartFP = -1;
|
||||
this.lastBlockStartFP = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -112,5 +112,6 @@ public interface IndexDictionary extends Accountable {
|
|||
* {@link org.apache.lucene.index.TermsEnum#seekExact} are called (it is not loaded for a direct
|
||||
* all-terms enumeration).
|
||||
*/
|
||||
interface BrowserSupplier extends IOSupplier<Browser>, Accountable {}
|
||||
interface BrowserSupplier extends IOSupplier<Browser>, Accountable {
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,9 +55,14 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
|
|||
protected final int deltaNumLines;
|
||||
protected final BlockEncoder blockEncoder;
|
||||
protected final BlockDecoder blockDecoder;
|
||||
protected final boolean dictionaryOnHeap;
|
||||
|
||||
/**
|
||||
* Creates a {@link UniformSplitPostingsFormat} with default settings.
|
||||
*/
|
||||
public UniformSplitPostingsFormat() {
|
||||
this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES, null, null);
|
||||
this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
|
||||
null, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -73,12 +78,20 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
|
|||
* It can be used for compression or encryption.
|
||||
* @param blockDecoder Optional block decoder, may be null if none.
|
||||
* It can be used for compression or encryption.
|
||||
* @param dictionaryOnHeap Whether to force loading the terms dictionary on-heap. By default it is kept off-heap without
|
||||
* impact on performance. If block encoding/decoding is used, then the dictionary is always
|
||||
* loaded on-heap whatever this parameter value is.
|
||||
*/
|
||||
public UniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
|
||||
this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder);
|
||||
public UniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder,
|
||||
boolean dictionaryOnHeap) {
|
||||
this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
|
||||
}
|
||||
|
||||
protected UniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
|
||||
/**
|
||||
* @see #UniformSplitPostingsFormat(int, int, BlockEncoder, BlockDecoder, boolean)
|
||||
*/
|
||||
protected UniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
|
||||
BlockDecoder blockDecoder, boolean dictionaryOnHeap) {
|
||||
super(name);
|
||||
UniformSplitTermsWriter.validateSettings(targetNumBlockLines, deltaNumLines);
|
||||
validateBlockEncoder(blockEncoder, blockDecoder);
|
||||
|
@ -86,6 +99,7 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
|
|||
this.deltaNumLines = deltaNumLines;
|
||||
this.blockEncoder = blockEncoder;
|
||||
this.blockDecoder = blockDecoder;
|
||||
this.dictionaryOnHeap = dictionaryOnHeap;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -125,7 +139,7 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
|
|||
|
||||
protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
|
||||
BlockDecoder blockDecoder) throws IOException {
|
||||
return new UniformSplitTermsReader(postingsReader, state, blockDecoder);
|
||||
return new UniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap);
|
||||
}
|
||||
|
||||
private static void validateBlockEncoder(BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
|
||||
|
|
|
@ -49,15 +49,6 @@ public class UniformSplitTerms extends Terms implements Accountable {
|
|||
protected final BlockDecoder blockDecoder;
|
||||
protected final IndexDictionary.BrowserSupplier dictionaryBrowserSupplier;
|
||||
|
||||
/**
|
||||
* @param blockDecoder Optional block decoder, may be null if none. It can be used for decompression or decryption.
|
||||
*/
|
||||
protected UniformSplitTerms(IndexInput dictionaryInput, IndexInput blockInput, FieldMetadata fieldMetadata,
|
||||
PostingsReaderBase postingsReader, BlockDecoder blockDecoder) throws IOException {
|
||||
this(blockInput, fieldMetadata, postingsReader, blockDecoder,
|
||||
new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadata.getDictionaryStartFP(), blockDecoder));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param blockDecoder Optional block decoder, may be null if none. It can be used for decompression or decryption.
|
||||
*/
|
||||
|
|
|
@ -65,21 +65,24 @@ public class UniformSplitTermsReader extends FieldsProducer {
|
|||
protected final Collection<String> sortedFieldNames;
|
||||
|
||||
/**
|
||||
* @param blockDecoder Optional block decoder, may be null if none.
|
||||
* It can be used for decompression or decryption.
|
||||
* @param blockDecoder Optional block decoder, may be null if none.
|
||||
* It can be used for decompression or decryption.
|
||||
* @param dictionaryOnHeap Whether to force loading the terms dictionary on-heap. By default it is kept off-heap without
|
||||
* impact on performance. If block encoding/decoding is used, then the dictionary is always
|
||||
* loaded on-heap whatever this parameter value is.
|
||||
*/
|
||||
public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
|
||||
this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
|
||||
public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
|
||||
boolean dictionaryOnHeap) throws IOException {
|
||||
this(postingsReader, state, blockDecoder, dictionaryOnHeap, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
|
||||
TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param blockDecoder Optional block decoder, may be null if none.
|
||||
* It can be used for decompression or decryption.
|
||||
* @see #UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
|
||||
*/
|
||||
protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
|
||||
BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
|
||||
String codecName, int versionStart, int versionCurrent,
|
||||
protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
|
||||
boolean dictionaryOnHeap, FieldMetadata.Serializer fieldMetadataReader,
|
||||
String codecName, int versionStart, int versionCurrent,
|
||||
String termsBlocksExtension, String dictionaryExtension) throws IOException {
|
||||
IndexInput dictionaryInput = null;
|
||||
IndexInput blockInput = null;
|
||||
|
@ -108,7 +111,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
|
|||
this.blockInput = blockInput;
|
||||
this.dictionaryInput = dictionaryInput;
|
||||
|
||||
fillFieldMap(postingsReader, blockDecoder, dictionaryInput, blockInput, fieldMetadataCollection, state.fieldInfos);
|
||||
fillFieldMap(postingsReader, state, blockDecoder, dictionaryOnHeap, dictionaryInput, blockInput, fieldMetadataCollection, state.fieldInfos);
|
||||
|
||||
List<String> fieldNames = new ArrayList<>(fieldToTermsMap.keySet());
|
||||
Collections.sort(fieldNames);
|
||||
|
@ -122,15 +125,21 @@ public class UniformSplitTermsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
|
||||
protected void fillFieldMap(PostingsReaderBase postingsReader, BlockDecoder blockDecoder,
|
||||
IndexInput dictionaryInput, IndexInput blockInput,
|
||||
Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
|
||||
protected void fillFieldMap(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
|
||||
boolean dictionaryOnHeap, IndexInput dictionaryInput, IndexInput blockInput,
|
||||
Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
|
||||
for (FieldMetadata fieldMetadata : fieldMetadataCollection) {
|
||||
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = createDictionaryBrowserSupplier(state, dictionaryInput, fieldMetadata, blockDecoder, dictionaryOnHeap);
|
||||
fieldToTermsMap.put(fieldMetadata.getFieldInfo().name,
|
||||
new UniformSplitTerms(dictionaryInput, blockInput, fieldMetadata, postingsReader, blockDecoder));
|
||||
new UniformSplitTerms(blockInput, fieldMetadata, postingsReader, blockDecoder, dictionaryBrowserSupplier));
|
||||
}
|
||||
}
|
||||
|
||||
protected IndexDictionary.BrowserSupplier createDictionaryBrowserSupplier(SegmentReadState state, IndexInput dictionaryInput, FieldMetadata fieldMetadata,
|
||||
BlockDecoder blockDecoder, boolean dictionaryOnHeap) throws IOException {
|
||||
return new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadata.getDictionaryStartFP(), blockDecoder, dictionaryOnHeap);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param indexInput {@link IndexInput} must be positioned to the fields metadata
|
||||
* details by calling {@link #seekFieldsMetadata(IndexInput)} before this call.
|
||||
|
|
|
@ -58,16 +58,25 @@ public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat {
|
|||
|
||||
public static final String NAME = "SharedTermsUniformSplit";
|
||||
|
||||
/**
|
||||
* Creates a {@link STUniformSplitPostingsFormat} with default settings.
|
||||
*/
|
||||
public STUniformSplitPostingsFormat() {
|
||||
this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES, null, null);
|
||||
this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
|
||||
null, null, false);
|
||||
}
|
||||
|
||||
public STUniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
|
||||
this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder);
|
||||
/**
|
||||
* @see UniformSplitPostingsFormat#UniformSplitPostingsFormat(int, int, BlockEncoder, BlockDecoder, boolean)
|
||||
*/
|
||||
public STUniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder,
|
||||
boolean dictionaryOnHeap) {
|
||||
this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
|
||||
}
|
||||
|
||||
protected STUniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
|
||||
super(name, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder);
|
||||
protected STUniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
|
||||
BlockDecoder blockDecoder, boolean dictionaryOnHeap) {
|
||||
super(name, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -79,6 +88,6 @@ public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat {
|
|||
@Override
|
||||
protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
|
||||
BlockDecoder blockDecoder) throws IOException {
|
||||
return new STUniformSplitTermsReader(postingsReader, state, blockDecoder);
|
||||
return new STUniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap);
|
||||
}
|
||||
}
|
|
@ -43,7 +43,8 @@ public class STUniformSplitTerms extends UniformSplitTerms {
|
|||
|
||||
protected STUniformSplitTerms(IndexInput blockInput, FieldMetadata fieldMetadata,
|
||||
FieldMetadata unionFieldMetadata, PostingsReaderBase postingsReader,
|
||||
BlockDecoder blockDecoder, FieldInfos fieldInfos, IndexDictionary.BrowserSupplier dictionaryBrowserSupplier) {
|
||||
BlockDecoder blockDecoder, FieldInfos fieldInfos,
|
||||
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier) {
|
||||
super(blockInput, fieldMetadata, postingsReader, blockDecoder, dictionaryBrowserSupplier);
|
||||
this.unionFieldMetadata = unionFieldMetadata;
|
||||
this.fieldInfos = fieldInfos;
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Collection;
|
|||
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.uniformsplit.BlockDecoder;
|
||||
import org.apache.lucene.codecs.uniformsplit.FSTDictionary;
|
||||
import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
|
||||
import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
|
||||
import org.apache.lucene.codecs.uniformsplit.UniformSplitTerms;
|
||||
|
@ -46,26 +45,33 @@ import static org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPo
|
|||
*/
|
||||
public class STUniformSplitTermsReader extends UniformSplitTermsReader {
|
||||
|
||||
public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
|
||||
this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE,
|
||||
/**
|
||||
* @see UniformSplitTermsReader#UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
|
||||
*/
|
||||
public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
|
||||
boolean dictionaryOnHeap) throws IOException {
|
||||
this(postingsReader, state, blockDecoder, dictionaryOnHeap, FieldMetadata.Serializer.INSTANCE,
|
||||
NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
|
||||
}
|
||||
|
||||
protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
|
||||
BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
|
||||
/**
|
||||
* @see UniformSplitTermsReader#UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
|
||||
*/
|
||||
protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
|
||||
boolean dictionaryOnHeap, FieldMetadata.Serializer fieldMetadataReader,
|
||||
String codecName, int versionStart, int versionCurrent,
|
||||
String termsBlocksExtension, String dictionaryExtension) throws IOException {
|
||||
super(postingsReader, state, blockDecoder, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
|
||||
super(postingsReader, state, blockDecoder, dictionaryOnHeap, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fillFieldMap(PostingsReaderBase postingsReader, BlockDecoder blockDecoder,
|
||||
IndexInput dictionaryInput, IndexInput blockInput,
|
||||
protected void fillFieldMap(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
|
||||
boolean dictionaryOnHeap, IndexInput dictionaryInput, IndexInput blockInput,
|
||||
Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
|
||||
if (!fieldMetadataCollection.isEmpty()) {
|
||||
FieldMetadata unionFieldMetadata = createUnionFieldMetadata(fieldMetadataCollection);
|
||||
// Share the same immutable dictionary between all fields.
|
||||
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadataCollection.iterator().next().getDictionaryStartFP(), blockDecoder);
|
||||
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = createDictionaryBrowserSupplier(state, dictionaryInput, unionFieldMetadata, blockDecoder, dictionaryOnHeap);
|
||||
for (FieldMetadata fieldMetadata : fieldMetadataCollection) {
|
||||
fieldToTermsMap.put(fieldMetadata.getFieldInfo().name,
|
||||
new STUniformSplitTerms(blockInput, fieldMetadata, unionFieldMetadata, postingsReader, blockDecoder, fieldInfos, dictionaryBrowserSupplier));
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public class UnionFieldMetadataBuilder {
|
||||
|
||||
private long dictionaryStartFP;
|
||||
private long minStartBlockFP;
|
||||
private long maxEndBlockFP;
|
||||
private BytesRef maxLastTerm;
|
||||
|
@ -36,13 +37,16 @@ public class UnionFieldMetadataBuilder {
|
|||
}
|
||||
|
||||
public UnionFieldMetadataBuilder reset() {
|
||||
maxEndBlockFP = Long.MIN_VALUE;
|
||||
dictionaryStartFP = -1;
|
||||
minStartBlockFP = Long.MAX_VALUE;
|
||||
maxEndBlockFP = Long.MIN_VALUE;
|
||||
maxLastTerm = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnionFieldMetadataBuilder addFieldMetadata(FieldMetadata fieldMetadata) {
|
||||
assert dictionaryStartFP == -1 || dictionaryStartFP == fieldMetadata.getDictionaryStartFP();
|
||||
dictionaryStartFP = fieldMetadata.getDictionaryStartFP();
|
||||
minStartBlockFP = Math.min(minStartBlockFP, fieldMetadata.getFirstBlockStartFP());
|
||||
maxEndBlockFP = Math.max(maxEndBlockFP, fieldMetadata.getLastBlockStartFP());
|
||||
if (maxLastTerm == null || maxLastTerm.compareTo(fieldMetadata.getLastTerm()) < 0) {
|
||||
|
@ -55,6 +59,6 @@ public class UnionFieldMetadataBuilder {
|
|||
if (maxLastTerm == null) {
|
||||
throw new IllegalStateException("no field metadata was provided");
|
||||
}
|
||||
return new FieldMetadata(null, 0, false, minStartBlockFP, maxEndBlockFP, maxLastTerm);
|
||||
return new FieldMetadata(dictionaryStartFP, minStartBlockFP, maxEndBlockFP, maxLastTerm);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -140,6 +140,7 @@ public class TestFSTDictionary extends LuceneTestCase {
|
|||
private static FSTDictionary serializeAndReadDictionary(FSTDictionary srcDictionary, boolean shouldEncrypt) throws IOException {
|
||||
ByteBuffersDataOutput output = ByteBuffersDataOutput.newResettableInstance();
|
||||
srcDictionary.write(output, shouldEncrypt ? Rot13CypherTestUtil.getBlockEncoder() : null);
|
||||
return FSTDictionary.read(output.toDataInput(), shouldEncrypt ? Rot13CypherTestUtil.getBlockDecoder() : null);
|
||||
// We must load the FST on-heap since we use a ByteBuffersDataInput which is not an instance of IndexInput.
|
||||
return FSTDictionary.read(output.toDataInput(), shouldEncrypt ? Rot13CypherTestUtil.getBlockDecoder() : null, true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.lucene.codecs.uniformsplit;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.After;
|
||||
|
@ -28,10 +29,21 @@ import org.junit.Before;
|
|||
*/
|
||||
public class TestUniformSplitPostingFormat extends BasePostingsFormatTestCase {
|
||||
|
||||
private final Codec codec = TestUtil.alwaysPostingsFormat(new UniformSplitRot13PostingsFormat());
|
||||
|
||||
protected final boolean checkEncoding;
|
||||
protected final Codec codec;
|
||||
private boolean shouldCheckDecoderWasCalled = true;
|
||||
|
||||
public TestUniformSplitPostingFormat() {
|
||||
checkEncoding = random().nextBoolean();
|
||||
codec = TestUtil.alwaysPostingsFormat(getPostingsFormat());
|
||||
}
|
||||
|
||||
protected PostingsFormat getPostingsFormat() {
|
||||
return checkEncoding ? new UniformSplitRot13PostingsFormat()
|
||||
: new UniformSplitPostingsFormat(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
|
||||
null, null, random().nextBoolean());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
|
@ -44,10 +56,12 @@ public class TestUniformSplitPostingFormat extends BasePostingsFormatTestCase {
|
|||
|
||||
@After
|
||||
public void checkEncodingCalled() {
|
||||
assertTrue(UniformSplitRot13PostingsFormat.blocksEncoded);
|
||||
assertTrue(UniformSplitRot13PostingsFormat.dictionaryEncoded);
|
||||
if (shouldCheckDecoderWasCalled) {
|
||||
assertTrue(UniformSplitRot13PostingsFormat.decoderCalled);
|
||||
if (checkEncoding) {
|
||||
assertTrue(UniformSplitRot13PostingsFormat.blocksEncoded);
|
||||
assertTrue(UniformSplitRot13PostingsFormat.dictionaryEncoded);
|
||||
if (shouldCheckDecoderWasCalled) {
|
||||
assertTrue(UniformSplitRot13PostingsFormat.decoderCalled);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,19 +17,19 @@
|
|||
|
||||
package org.apache.lucene.codecs.uniformsplit.sharedterms;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.uniformsplit.TestUniformSplitPostingFormat;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.codecs.uniformsplit.UniformSplitTermsWriter;
|
||||
|
||||
/**
|
||||
* Tests {@link STUniformSplitPostingsFormat} with block encoding using ROT13 cypher.
|
||||
*/
|
||||
public class TestSTUniformSplitPostingFormat extends TestUniformSplitPostingFormat {
|
||||
|
||||
private final Codec codec = TestUtil.alwaysPostingsFormat(new STUniformSplitRot13PostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
protected PostingsFormat getPostingsFormat() {
|
||||
return checkEncoding ? new STUniformSplitRot13PostingsFormat()
|
||||
: new STUniformSplitPostingsFormat(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
|
||||
null, null, random().nextBoolean());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,13 +41,15 @@ public class UniformSplitRot13PostingsFormat extends PostingsFormat {
|
|||
public static volatile boolean decoderCalled;
|
||||
public static volatile boolean blocksEncoded;
|
||||
public static volatile boolean dictionaryEncoded;
|
||||
protected final boolean dictionaryOnHeap;
|
||||
|
||||
public UniformSplitRot13PostingsFormat() {
|
||||
this("UniformSplitRot13");
|
||||
this("UniformSplitRot13", false);
|
||||
}
|
||||
|
||||
protected UniformSplitRot13PostingsFormat(String name) {
|
||||
protected UniformSplitRot13PostingsFormat(String name, boolean dictionaryOnHeap) {
|
||||
super(name);
|
||||
this.dictionaryOnHeap = dictionaryOnHeap;
|
||||
}
|
||||
|
||||
public static void resetEncodingFlags() {
|
||||
|
@ -135,7 +137,7 @@ public class UniformSplitRot13PostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
protected FieldsProducer createFieldsProducer(SegmentReadState segmentReadState, PostingsReaderBase postingsReader) throws IOException {
|
||||
return new UniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder());
|
||||
return new UniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder(), dictionaryOnHeap);
|
||||
}
|
||||
|
||||
protected BlockDecoder getBlockDecoder() {
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
public class STUniformSplitRot13PostingsFormat extends UniformSplitRot13PostingsFormat {
|
||||
|
||||
public STUniformSplitRot13PostingsFormat() {
|
||||
super("STUniformSplitRot13");
|
||||
super("STUniformSplitRot13", false);
|
||||
}
|
||||
|
||||
protected FieldsConsumer createFieldsConsumer(SegmentWriteState segmentWriteState, PostingsWriterBase postingsWriter) throws IOException {
|
||||
|
@ -54,6 +54,6 @@ public class STUniformSplitRot13PostingsFormat extends UniformSplitRot13Postings
|
|||
}
|
||||
|
||||
protected FieldsProducer createFieldsProducer(SegmentReadState segmentReadState, PostingsReaderBase postingsReader) throws IOException {
|
||||
return new STUniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder());
|
||||
return new STUniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder(), dictionaryOnHeap);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue