LUCENE-9254: UniformSplit supports FST off-heap.

Closes #1301
This commit is contained in:
Bruno Roustant 2020-02-27 12:56:58 +01:00
parent 7b9f212907
commit c7cf9e8e4f
No known key found for this signature in database
GPG Key ID: CD28DABB95360525
16 changed files with 156 additions and 88 deletions

View File

@ -118,7 +118,8 @@ Improvements
Optimizations Optimizations
--------------------- ---------------------
(No changes)
* LUCENE-9254: UniformSplit keeps FST off-heap. (Bruno Roustant)
Bug Fixes Bug Fixes
--------------------- ---------------------

View File

@ -30,6 +30,7 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.OffHeapFSTStore;
import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
@ -56,24 +57,24 @@ public class FSTDictionary implements IndexDictionary {
private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(FSTDictionary.class); private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(FSTDictionary.class);
protected final FST<Long> dictionary; protected final FST<Long> fst;
protected FSTDictionary(FST<Long> dictionary) { protected FSTDictionary(FST<Long> fst) {
this.dictionary = dictionary; this.fst = fst;
} }
@Override @Override
public long ramBytesUsed() { public long ramBytesUsed() {
return BASE_RAM_USAGE + dictionary.ramBytesUsed(); return BASE_RAM_USAGE + fst.ramBytesUsed();
} }
@Override @Override
public void write(DataOutput output, BlockEncoder blockEncoder) throws IOException { public void write(DataOutput output, BlockEncoder blockEncoder) throws IOException {
if (blockEncoder == null) { if (blockEncoder == null) {
dictionary.save(output); fst.save(output);
} else { } else {
ByteBuffersDataOutput bytesDataOutput = ByteBuffersDataOutput.newResettableInstance(); ByteBuffersDataOutput bytesDataOutput = ByteBuffersDataOutput.newResettableInstance();
dictionary.save(bytesDataOutput); fst.save(bytesDataOutput);
BlockEncoder.WritableBytes encodedBytes = blockEncoder.encode(bytesDataOutput.toDataInput(), bytesDataOutput.size()); BlockEncoder.WritableBytes encodedBytes = blockEncoder.encode(bytesDataOutput.toDataInput(), bytesDataOutput.size());
output.writeVLong(encodedBytes.size()); output.writeVLong(encodedBytes.size());
encodedBytes.writeTo(output); encodedBytes.writeTo(output);
@ -84,7 +85,7 @@ public class FSTDictionary implements IndexDictionary {
* Reads a {@link FSTDictionary} from the provided input. * Reads a {@link FSTDictionary} from the provided input.
* @param blockDecoder The {@link BlockDecoder} to use for specific decoding; or null if none. * @param blockDecoder The {@link BlockDecoder} to use for specific decoding; or null if none.
*/ */
protected static FSTDictionary read(DataInput input, BlockDecoder blockDecoder) throws IOException { protected static FSTDictionary read(DataInput input, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
DataInput fstDataInput; DataInput fstDataInput;
if (blockDecoder == null) { if (blockDecoder == null) {
fstDataInput = input; fstDataInput = input;
@ -92,10 +93,14 @@ public class FSTDictionary implements IndexDictionary {
long numBytes = input.readVLong(); long numBytes = input.readVLong();
BytesRef decodedBytes = blockDecoder.decode(input, numBytes); BytesRef decodedBytes = blockDecoder.decode(input, numBytes);
fstDataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length); fstDataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
// OffHeapFSTStore.init() requires a DataInput which is an instance of IndexInput.
// When the block is decoded we must load the FST on heap.
isFSTOnHeap = true;
} }
PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton(); PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
FST<Long> dictionary = new FST<>(fstDataInput, fstOutputs); FST<Long> fst = isFSTOnHeap ? new FST<>(fstDataInput, fstOutputs)
return new FSTDictionary(dictionary); : new FST<>(fstDataInput, fstOutputs, new OffHeapFSTStore());
return new FSTDictionary(fst);
} }
@Override @Override
@ -109,7 +114,7 @@ public class FSTDictionary implements IndexDictionary {
*/ */
protected class Browser implements IndexDictionary.Browser { protected class Browser implements IndexDictionary.Browser {
protected final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(dictionary); protected final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
@Override @Override
public long seekBlock(BytesRef term) throws IOException { public long seekBlock(BytesRef term) throws IOException {
@ -127,16 +132,19 @@ public class FSTDictionary implements IndexDictionary {
protected final IndexInput dictionaryInput; protected final IndexInput dictionaryInput;
protected final BlockDecoder blockDecoder; protected final BlockDecoder blockDecoder;
protected final boolean isFSTOnHeap;
/** /**
* Lazy loaded immutable index dictionary (trie hold in RAM). * Lazy loaded immutable index dictionary FST.
* The FST is either kept off-heap, or hold in RAM on-heap.
*/ */
protected IndexDictionary dictionary; protected IndexDictionary dictionary;
public BrowserSupplier(IndexInput dictionaryInput, long startFilePointer, BlockDecoder blockDecoder) throws IOException { public BrowserSupplier(IndexInput dictionaryInput, long dictionaryStartFP, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
this.dictionaryInput = dictionaryInput.clone(); this.dictionaryInput = dictionaryInput.clone();
this.dictionaryInput.seek(startFilePointer); this.dictionaryInput.seek(dictionaryStartFP);
this.blockDecoder = blockDecoder; this.blockDecoder = blockDecoder;
this.isFSTOnHeap = isFSTOnHeap;
} }
@Override @Override
@ -147,7 +155,7 @@ public class FSTDictionary implements IndexDictionary {
if (dictionary == null) { if (dictionary == null) {
synchronized (this) { synchronized (this) {
if (dictionary == null) { if (dictionary == null) {
dictionary = read(dictionaryInput, blockDecoder); dictionary = read(dictionaryInput, blockDecoder, isFSTOnHeap);
} }
} }
} }

View File

@ -59,31 +59,38 @@ public class FieldMetadata implements Accountable {
protected BytesRef lastTerm; protected BytesRef lastTerm;
/** /**
* Constructs a {@link FieldMetadata} used for writing the index. This {@link FieldMetadata} is mutable. * Constructs field metadata for writing.
*
* @param maxDoc The total number of documents in the segment being written. * @param maxDoc The total number of documents in the segment being written.
*/ */
public FieldMetadata(FieldInfo fieldInfo, int maxDoc) { public FieldMetadata(FieldInfo fieldInfo, int maxDoc) {
this(fieldInfo, maxDoc, true); this(fieldInfo, maxDoc, true);
} }
public FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable) { /**
this(fieldInfo, maxDoc, isMutable, -1, -1, null); * Constructs immutable virtual field metadata for reading.
*/
public FieldMetadata(long dictionaryStartFP, long firstBlockStartFP, long lastBlockStartFP, BytesRef lastTerm) {
this(null, 0, false);
this.dictionaryStartFP = dictionaryStartFP;
this.firstBlockStartFP = firstBlockStartFP;
this.lastBlockStartFP = lastBlockStartFP;
this.lastTerm = lastTerm;
} }
/** /**
* Constructs field metadata for reading or writing.
* @param maxDoc The total number of documents in the segment being written.
* @param isMutable Set true if this FieldMetadata is created for writing the index. Set false if it is used for reading the index. * @param isMutable Set true if this FieldMetadata is created for writing the index. Set false if it is used for reading the index.
*/ */
public FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable, long firstBlockStartFP, long lastBlockStartFP, BytesRef lastTerm) { protected FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable) {
assert isMutable || maxDoc == 0; assert isMutable || maxDoc == 0;
this.fieldInfo = fieldInfo; this.fieldInfo = fieldInfo;
this.isMutable = isMutable; this.isMutable = isMutable;
// docsSeen must not be set if this FieldMetadata is immutable, that means it is used for reading the index. // docsSeen must not be set if this FieldMetadata is immutable, that means it is used for reading the index.
this.docsSeen = isMutable ? new FixedBitSet(maxDoc) : null; this.docsSeen = isMutable ? new FixedBitSet(maxDoc) : null;
this.dictionaryStartFP = -1; this.dictionaryStartFP = -1;
this.firstBlockStartFP = firstBlockStartFP; this.firstBlockStartFP = -1;
this.lastBlockStartFP = lastBlockStartFP; this.lastBlockStartFP = -1;
this.lastTerm = lastTerm;
} }
/** /**

View File

@ -112,5 +112,6 @@ public interface IndexDictionary extends Accountable {
* {@link org.apache.lucene.index.TermsEnum#seekExact} are called (it is not loaded for a direct * {@link org.apache.lucene.index.TermsEnum#seekExact} are called (it is not loaded for a direct
* all-terms enumeration). * all-terms enumeration).
*/ */
interface BrowserSupplier extends IOSupplier<Browser>, Accountable {} interface BrowserSupplier extends IOSupplier<Browser>, Accountable {
}
} }

View File

@ -55,9 +55,14 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
protected final int deltaNumLines; protected final int deltaNumLines;
protected final BlockEncoder blockEncoder; protected final BlockEncoder blockEncoder;
protected final BlockDecoder blockDecoder; protected final BlockDecoder blockDecoder;
protected final boolean dictionaryOnHeap;
/**
* Creates a {@link UniformSplitPostingsFormat} with default settings.
*/
public UniformSplitPostingsFormat() { public UniformSplitPostingsFormat() {
this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES, null, null); this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
null, null, false);
} }
/** /**
@ -73,12 +78,20 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
* It can be used for compression or encryption. * It can be used for compression or encryption.
* @param blockDecoder Optional block decoder, may be null if none. * @param blockDecoder Optional block decoder, may be null if none.
* It can be used for compression or encryption. * It can be used for compression or encryption.
* @param dictionaryOnHeap Whether to force loading the terms dictionary on-heap. By default it is kept off-heap without
* impact on performance. If block encoding/decoding is used, then the dictionary is always
* loaded on-heap whatever this parameter value is.
*/ */
public UniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) { public UniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder,
this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder); boolean dictionaryOnHeap) {
this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
} }
protected UniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) { /**
* @see #UniformSplitPostingsFormat(int, int, BlockEncoder, BlockDecoder, boolean)
*/
protected UniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
BlockDecoder blockDecoder, boolean dictionaryOnHeap) {
super(name); super(name);
UniformSplitTermsWriter.validateSettings(targetNumBlockLines, deltaNumLines); UniformSplitTermsWriter.validateSettings(targetNumBlockLines, deltaNumLines);
validateBlockEncoder(blockEncoder, blockDecoder); validateBlockEncoder(blockEncoder, blockDecoder);
@ -86,6 +99,7 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
this.deltaNumLines = deltaNumLines; this.deltaNumLines = deltaNumLines;
this.blockEncoder = blockEncoder; this.blockEncoder = blockEncoder;
this.blockDecoder = blockDecoder; this.blockDecoder = blockDecoder;
this.dictionaryOnHeap = dictionaryOnHeap;
} }
@Override @Override
@ -125,7 +139,7 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
BlockDecoder blockDecoder) throws IOException { BlockDecoder blockDecoder) throws IOException {
return new UniformSplitTermsReader(postingsReader, state, blockDecoder); return new UniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap);
} }
private static void validateBlockEncoder(BlockEncoder blockEncoder, BlockDecoder blockDecoder) { private static void validateBlockEncoder(BlockEncoder blockEncoder, BlockDecoder blockDecoder) {

View File

@ -49,15 +49,6 @@ public class UniformSplitTerms extends Terms implements Accountable {
protected final BlockDecoder blockDecoder; protected final BlockDecoder blockDecoder;
protected final IndexDictionary.BrowserSupplier dictionaryBrowserSupplier; protected final IndexDictionary.BrowserSupplier dictionaryBrowserSupplier;
/**
* @param blockDecoder Optional block decoder, may be null if none. It can be used for decompression or decryption.
*/
protected UniformSplitTerms(IndexInput dictionaryInput, IndexInput blockInput, FieldMetadata fieldMetadata,
PostingsReaderBase postingsReader, BlockDecoder blockDecoder) throws IOException {
this(blockInput, fieldMetadata, postingsReader, blockDecoder,
new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadata.getDictionaryStartFP(), blockDecoder));
}
/** /**
* @param blockDecoder Optional block decoder, may be null if none. It can be used for decompression or decryption. * @param blockDecoder Optional block decoder, may be null if none. It can be used for decompression or decryption.
*/ */

View File

@ -65,21 +65,24 @@ public class UniformSplitTermsReader extends FieldsProducer {
protected final Collection<String> sortedFieldNames; protected final Collection<String> sortedFieldNames;
/** /**
* @param blockDecoder Optional block decoder, may be null if none. * @param blockDecoder Optional block decoder, may be null if none.
* It can be used for decompression or decryption. * It can be used for decompression or decryption.
* @param dictionaryOnHeap Whether to force loading the terms dictionary on-heap. By default it is kept off-heap without
* impact on performance. If block encoding/decoding is used, then the dictionary is always
* loaded on-heap whatever this parameter value is.
*/ */
public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException { public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT, boolean dictionaryOnHeap) throws IOException {
this(postingsReader, state, blockDecoder, dictionaryOnHeap, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION); TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
} }
/** /**
* @param blockDecoder Optional block decoder, may be null if none. * @see #UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
* It can be used for decompression or decryption.
*/ */
protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader, boolean dictionaryOnHeap, FieldMetadata.Serializer fieldMetadataReader,
String codecName, int versionStart, int versionCurrent, String codecName, int versionStart, int versionCurrent,
String termsBlocksExtension, String dictionaryExtension) throws IOException { String termsBlocksExtension, String dictionaryExtension) throws IOException {
IndexInput dictionaryInput = null; IndexInput dictionaryInput = null;
IndexInput blockInput = null; IndexInput blockInput = null;
@ -108,7 +111,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
this.blockInput = blockInput; this.blockInput = blockInput;
this.dictionaryInput = dictionaryInput; this.dictionaryInput = dictionaryInput;
fillFieldMap(postingsReader, blockDecoder, dictionaryInput, blockInput, fieldMetadataCollection, state.fieldInfos); fillFieldMap(postingsReader, state, blockDecoder, dictionaryOnHeap, dictionaryInput, blockInput, fieldMetadataCollection, state.fieldInfos);
List<String> fieldNames = new ArrayList<>(fieldToTermsMap.keySet()); List<String> fieldNames = new ArrayList<>(fieldToTermsMap.keySet());
Collections.sort(fieldNames); Collections.sort(fieldNames);
@ -122,15 +125,21 @@ public class UniformSplitTermsReader extends FieldsProducer {
} }
} }
protected void fillFieldMap(PostingsReaderBase postingsReader, BlockDecoder blockDecoder, protected void fillFieldMap(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
IndexInput dictionaryInput, IndexInput blockInput, boolean dictionaryOnHeap, IndexInput dictionaryInput, IndexInput blockInput,
Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException { Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
for (FieldMetadata fieldMetadata : fieldMetadataCollection) { for (FieldMetadata fieldMetadata : fieldMetadataCollection) {
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = createDictionaryBrowserSupplier(state, dictionaryInput, fieldMetadata, blockDecoder, dictionaryOnHeap);
fieldToTermsMap.put(fieldMetadata.getFieldInfo().name, fieldToTermsMap.put(fieldMetadata.getFieldInfo().name,
new UniformSplitTerms(dictionaryInput, blockInput, fieldMetadata, postingsReader, blockDecoder)); new UniformSplitTerms(blockInput, fieldMetadata, postingsReader, blockDecoder, dictionaryBrowserSupplier));
} }
} }
protected IndexDictionary.BrowserSupplier createDictionaryBrowserSupplier(SegmentReadState state, IndexInput dictionaryInput, FieldMetadata fieldMetadata,
BlockDecoder blockDecoder, boolean dictionaryOnHeap) throws IOException {
return new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadata.getDictionaryStartFP(), blockDecoder, dictionaryOnHeap);
}
/** /**
* @param indexInput {@link IndexInput} must be positioned to the fields metadata * @param indexInput {@link IndexInput} must be positioned to the fields metadata
* details by calling {@link #seekFieldsMetadata(IndexInput)} before this call. * details by calling {@link #seekFieldsMetadata(IndexInput)} before this call.

View File

@ -58,16 +58,25 @@ public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat {
public static final String NAME = "SharedTermsUniformSplit"; public static final String NAME = "SharedTermsUniformSplit";
/**
* Creates a {@link STUniformSplitPostingsFormat} with default settings.
*/
public STUniformSplitPostingsFormat() { public STUniformSplitPostingsFormat() {
this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES, null, null); this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
null, null, false);
} }
public STUniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) { /**
this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder); * @see UniformSplitPostingsFormat#UniformSplitPostingsFormat(int, int, BlockEncoder, BlockDecoder, boolean)
*/
public STUniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder,
boolean dictionaryOnHeap) {
this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
} }
protected STUniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) { protected STUniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
super(name, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder); BlockDecoder blockDecoder, boolean dictionaryOnHeap) {
super(name, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
} }
@Override @Override
@ -79,6 +88,6 @@ public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat {
@Override @Override
protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
BlockDecoder blockDecoder) throws IOException { BlockDecoder blockDecoder) throws IOException {
return new STUniformSplitTermsReader(postingsReader, state, blockDecoder); return new STUniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap);
} }
} }

View File

@ -43,7 +43,8 @@ public class STUniformSplitTerms extends UniformSplitTerms {
protected STUniformSplitTerms(IndexInput blockInput, FieldMetadata fieldMetadata, protected STUniformSplitTerms(IndexInput blockInput, FieldMetadata fieldMetadata,
FieldMetadata unionFieldMetadata, PostingsReaderBase postingsReader, FieldMetadata unionFieldMetadata, PostingsReaderBase postingsReader,
BlockDecoder blockDecoder, FieldInfos fieldInfos, IndexDictionary.BrowserSupplier dictionaryBrowserSupplier) { BlockDecoder blockDecoder, FieldInfos fieldInfos,
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier) {
super(blockInput, fieldMetadata, postingsReader, blockDecoder, dictionaryBrowserSupplier); super(blockInput, fieldMetadata, postingsReader, blockDecoder, dictionaryBrowserSupplier);
this.unionFieldMetadata = unionFieldMetadata; this.unionFieldMetadata = unionFieldMetadata;
this.fieldInfos = fieldInfos; this.fieldInfos = fieldInfos;

View File

@ -22,7 +22,6 @@ import java.util.Collection;
import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.uniformsplit.BlockDecoder; import org.apache.lucene.codecs.uniformsplit.BlockDecoder;
import org.apache.lucene.codecs.uniformsplit.FSTDictionary;
import org.apache.lucene.codecs.uniformsplit.FieldMetadata; import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
import org.apache.lucene.codecs.uniformsplit.IndexDictionary; import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
import org.apache.lucene.codecs.uniformsplit.UniformSplitTerms; import org.apache.lucene.codecs.uniformsplit.UniformSplitTerms;
@ -46,26 +45,33 @@ import static org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPo
*/ */
public class STUniformSplitTermsReader extends UniformSplitTermsReader { public class STUniformSplitTermsReader extends UniformSplitTermsReader {
public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException { /**
this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, * @see UniformSplitTermsReader#UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
*/
public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
boolean dictionaryOnHeap) throws IOException {
this(postingsReader, state, blockDecoder, dictionaryOnHeap, FieldMetadata.Serializer.INSTANCE,
NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION); NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
} }
protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, /**
BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader, * @see UniformSplitTermsReader#UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
*/
protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
boolean dictionaryOnHeap, FieldMetadata.Serializer fieldMetadataReader,
String codecName, int versionStart, int versionCurrent, String codecName, int versionStart, int versionCurrent,
String termsBlocksExtension, String dictionaryExtension) throws IOException { String termsBlocksExtension, String dictionaryExtension) throws IOException {
super(postingsReader, state, blockDecoder, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension); super(postingsReader, state, blockDecoder, dictionaryOnHeap, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
} }
@Override @Override
protected void fillFieldMap(PostingsReaderBase postingsReader, BlockDecoder blockDecoder, protected void fillFieldMap(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
IndexInput dictionaryInput, IndexInput blockInput, boolean dictionaryOnHeap, IndexInput dictionaryInput, IndexInput blockInput,
Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException { Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
if (!fieldMetadataCollection.isEmpty()) { if (!fieldMetadataCollection.isEmpty()) {
FieldMetadata unionFieldMetadata = createUnionFieldMetadata(fieldMetadataCollection); FieldMetadata unionFieldMetadata = createUnionFieldMetadata(fieldMetadataCollection);
// Share the same immutable dictionary between all fields. // Share the same immutable dictionary between all fields.
IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadataCollection.iterator().next().getDictionaryStartFP(), blockDecoder); IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = createDictionaryBrowserSupplier(state, dictionaryInput, unionFieldMetadata, blockDecoder, dictionaryOnHeap);
for (FieldMetadata fieldMetadata : fieldMetadataCollection) { for (FieldMetadata fieldMetadata : fieldMetadataCollection) {
fieldToTermsMap.put(fieldMetadata.getFieldInfo().name, fieldToTermsMap.put(fieldMetadata.getFieldInfo().name,
new STUniformSplitTerms(blockInput, fieldMetadata, unionFieldMetadata, postingsReader, blockDecoder, fieldInfos, dictionaryBrowserSupplier)); new STUniformSplitTerms(blockInput, fieldMetadata, unionFieldMetadata, postingsReader, blockDecoder, fieldInfos, dictionaryBrowserSupplier));

View File

@ -27,6 +27,7 @@ import org.apache.lucene.util.BytesRef;
*/ */
public class UnionFieldMetadataBuilder { public class UnionFieldMetadataBuilder {
private long dictionaryStartFP;
private long minStartBlockFP; private long minStartBlockFP;
private long maxEndBlockFP; private long maxEndBlockFP;
private BytesRef maxLastTerm; private BytesRef maxLastTerm;
@ -36,13 +37,16 @@ public class UnionFieldMetadataBuilder {
} }
public UnionFieldMetadataBuilder reset() { public UnionFieldMetadataBuilder reset() {
maxEndBlockFP = Long.MIN_VALUE; dictionaryStartFP = -1;
minStartBlockFP = Long.MAX_VALUE; minStartBlockFP = Long.MAX_VALUE;
maxEndBlockFP = Long.MIN_VALUE;
maxLastTerm = null; maxLastTerm = null;
return this; return this;
} }
public UnionFieldMetadataBuilder addFieldMetadata(FieldMetadata fieldMetadata) { public UnionFieldMetadataBuilder addFieldMetadata(FieldMetadata fieldMetadata) {
assert dictionaryStartFP == -1 || dictionaryStartFP == fieldMetadata.getDictionaryStartFP();
dictionaryStartFP = fieldMetadata.getDictionaryStartFP();
minStartBlockFP = Math.min(minStartBlockFP, fieldMetadata.getFirstBlockStartFP()); minStartBlockFP = Math.min(minStartBlockFP, fieldMetadata.getFirstBlockStartFP());
maxEndBlockFP = Math.max(maxEndBlockFP, fieldMetadata.getLastBlockStartFP()); maxEndBlockFP = Math.max(maxEndBlockFP, fieldMetadata.getLastBlockStartFP());
if (maxLastTerm == null || maxLastTerm.compareTo(fieldMetadata.getLastTerm()) < 0) { if (maxLastTerm == null || maxLastTerm.compareTo(fieldMetadata.getLastTerm()) < 0) {
@ -55,6 +59,6 @@ public class UnionFieldMetadataBuilder {
if (maxLastTerm == null) { if (maxLastTerm == null) {
throw new IllegalStateException("no field metadata was provided"); throw new IllegalStateException("no field metadata was provided");
} }
return new FieldMetadata(null, 0, false, minStartBlockFP, maxEndBlockFP, maxLastTerm); return new FieldMetadata(dictionaryStartFP, minStartBlockFP, maxEndBlockFP, maxLastTerm);
} }
} }

View File

@ -140,6 +140,7 @@ public class TestFSTDictionary extends LuceneTestCase {
private static FSTDictionary serializeAndReadDictionary(FSTDictionary srcDictionary, boolean shouldEncrypt) throws IOException { private static FSTDictionary serializeAndReadDictionary(FSTDictionary srcDictionary, boolean shouldEncrypt) throws IOException {
ByteBuffersDataOutput output = ByteBuffersDataOutput.newResettableInstance(); ByteBuffersDataOutput output = ByteBuffersDataOutput.newResettableInstance();
srcDictionary.write(output, shouldEncrypt ? Rot13CypherTestUtil.getBlockEncoder() : null); srcDictionary.write(output, shouldEncrypt ? Rot13CypherTestUtil.getBlockEncoder() : null);
return FSTDictionary.read(output.toDataInput(), shouldEncrypt ? Rot13CypherTestUtil.getBlockDecoder() : null); // We must load the FST on-heap since we use a ByteBuffersDataInput which is not an instance of IndexInput.
return FSTDictionary.read(output.toDataInput(), shouldEncrypt ? Rot13CypherTestUtil.getBlockDecoder() : null, true);
} }
} }

View File

@ -18,6 +18,7 @@
package org.apache.lucene.codecs.uniformsplit; package org.apache.lucene.codecs.uniformsplit;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.index.BasePostingsFormatTestCase; import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.junit.After; import org.junit.After;
@ -28,10 +29,21 @@ import org.junit.Before;
*/ */
public class TestUniformSplitPostingFormat extends BasePostingsFormatTestCase { public class TestUniformSplitPostingFormat extends BasePostingsFormatTestCase {
private final Codec codec = TestUtil.alwaysPostingsFormat(new UniformSplitRot13PostingsFormat()); protected final boolean checkEncoding;
protected final Codec codec;
private boolean shouldCheckDecoderWasCalled = true; private boolean shouldCheckDecoderWasCalled = true;
public TestUniformSplitPostingFormat() {
checkEncoding = random().nextBoolean();
codec = TestUtil.alwaysPostingsFormat(getPostingsFormat());
}
protected PostingsFormat getPostingsFormat() {
return checkEncoding ? new UniformSplitRot13PostingsFormat()
: new UniformSplitPostingsFormat(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
null, null, random().nextBoolean());
}
@Override @Override
protected Codec getCodec() { protected Codec getCodec() {
return codec; return codec;
@ -44,10 +56,12 @@ public class TestUniformSplitPostingFormat extends BasePostingsFormatTestCase {
@After @After
public void checkEncodingCalled() { public void checkEncodingCalled() {
assertTrue(UniformSplitRot13PostingsFormat.blocksEncoded); if (checkEncoding) {
assertTrue(UniformSplitRot13PostingsFormat.dictionaryEncoded); assertTrue(UniformSplitRot13PostingsFormat.blocksEncoded);
if (shouldCheckDecoderWasCalled) { assertTrue(UniformSplitRot13PostingsFormat.dictionaryEncoded);
assertTrue(UniformSplitRot13PostingsFormat.decoderCalled); if (shouldCheckDecoderWasCalled) {
assertTrue(UniformSplitRot13PostingsFormat.decoderCalled);
}
} }
} }

View File

@ -17,19 +17,19 @@
package org.apache.lucene.codecs.uniformsplit.sharedterms; package org.apache.lucene.codecs.uniformsplit.sharedterms;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.uniformsplit.TestUniformSplitPostingFormat; import org.apache.lucene.codecs.uniformsplit.TestUniformSplitPostingFormat;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.codecs.uniformsplit.UniformSplitTermsWriter;
/** /**
* Tests {@link STUniformSplitPostingsFormat} with block encoding using ROT13 cypher. * Tests {@link STUniformSplitPostingsFormat} with block encoding using ROT13 cypher.
*/ */
public class TestSTUniformSplitPostingFormat extends TestUniformSplitPostingFormat { public class TestSTUniformSplitPostingFormat extends TestUniformSplitPostingFormat {
private final Codec codec = TestUtil.alwaysPostingsFormat(new STUniformSplitRot13PostingsFormat());
@Override @Override
protected Codec getCodec() { protected PostingsFormat getPostingsFormat() {
return codec; return checkEncoding ? new STUniformSplitRot13PostingsFormat()
: new STUniformSplitPostingsFormat(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
null, null, random().nextBoolean());
} }
} }

View File

@ -41,13 +41,15 @@ public class UniformSplitRot13PostingsFormat extends PostingsFormat {
public static volatile boolean decoderCalled; public static volatile boolean decoderCalled;
public static volatile boolean blocksEncoded; public static volatile boolean blocksEncoded;
public static volatile boolean dictionaryEncoded; public static volatile boolean dictionaryEncoded;
protected final boolean dictionaryOnHeap;
public UniformSplitRot13PostingsFormat() { public UniformSplitRot13PostingsFormat() {
this("UniformSplitRot13"); this("UniformSplitRot13", false);
} }
protected UniformSplitRot13PostingsFormat(String name) { protected UniformSplitRot13PostingsFormat(String name, boolean dictionaryOnHeap) {
super(name); super(name);
this.dictionaryOnHeap = dictionaryOnHeap;
} }
public static void resetEncodingFlags() { public static void resetEncodingFlags() {
@ -135,7 +137,7 @@ public class UniformSplitRot13PostingsFormat extends PostingsFormat {
} }
protected FieldsProducer createFieldsProducer(SegmentReadState segmentReadState, PostingsReaderBase postingsReader) throws IOException { protected FieldsProducer createFieldsProducer(SegmentReadState segmentReadState, PostingsReaderBase postingsReader) throws IOException {
return new UniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder()); return new UniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder(), dictionaryOnHeap);
} }
protected BlockDecoder getBlockDecoder() { protected BlockDecoder getBlockDecoder() {

View File

@ -35,7 +35,7 @@ import org.apache.lucene.index.SegmentWriteState;
public class STUniformSplitRot13PostingsFormat extends UniformSplitRot13PostingsFormat { public class STUniformSplitRot13PostingsFormat extends UniformSplitRot13PostingsFormat {
public STUniformSplitRot13PostingsFormat() { public STUniformSplitRot13PostingsFormat() {
super("STUniformSplitRot13"); super("STUniformSplitRot13", false);
} }
protected FieldsConsumer createFieldsConsumer(SegmentWriteState segmentWriteState, PostingsWriterBase postingsWriter) throws IOException { protected FieldsConsumer createFieldsConsumer(SegmentWriteState segmentWriteState, PostingsWriterBase postingsWriter) throws IOException {
@ -54,6 +54,6 @@ public class STUniformSplitRot13PostingsFormat extends UniformSplitRot13Postings
} }
protected FieldsProducer createFieldsProducer(SegmentReadState segmentReadState, PostingsReaderBase postingsReader) throws IOException { protected FieldsProducer createFieldsProducer(SegmentReadState segmentReadState, PostingsReaderBase postingsReader) throws IOException {
return new STUniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder()); return new STUniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder(), dictionaryOnHeap);
} }
} }