LUCENE-9106: UniformSplit postings format allows extension of block/line serializers.

This commit is contained in:
Bruno Roustant 2019-12-31 11:27:53 +01:00
parent 6eff727590
commit a97271fc52
No known key found for this signature in database
GPG Key ID: CD28DABB95360525
15 changed files with 217 additions and 146 deletions

View File

@ -28,6 +28,8 @@ Improvements
* LUCENE-9105: UniformSplit postings format detects corrupted index and better handles IO exceptions. (Bruno Roustant) * LUCENE-9105: UniformSplit postings format detects corrupted index and better handles IO exceptions. (Bruno Roustant)
* LUCENE-9106: UniformSplit postings format allows extension of block/line serializers. (Bruno Roustant)
Optimizations Optimizations
--------------------- ---------------------
(No changes) (No changes)

View File

@ -134,43 +134,49 @@ public class BlockHeader implements Accountable {
return basePayloadsFP; return basePayloadsFP;
} }
public void write(DataOutput output) throws IOException {
assert linesCount > 0 : "block header does not seem to be initialized";
output.writeVInt(linesCount);
output.writeVLong(baseDocsFP);
output.writeVLong(basePositionsFP);
output.writeVLong(basePayloadsFP);
output.writeVInt(termStatesBaseOffset);
output.writeVInt(middleLineOffset);
}
public static BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
int linesCount = input.readVInt();
if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
throw new CorruptIndexException("Illegal number of lines in a block: " + linesCount, input);
}
long baseDocsFP = input.readVLong();
long basePositionsFP = input.readVLong();
long basePayloadsFP = input.readVLong();
int termStatesBaseOffset = input.readVInt();
if (termStatesBaseOffset < 0) {
throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
}
int middleTermOffset = input.readVInt();
if (middleTermOffset < 0) {
throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
}
BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
}
@Override @Override
public long ramBytesUsed() { public long ramBytesUsed() {
return RAM_USAGE; return RAM_USAGE;
} }
/**
* Reads/writes block header.
*/
public static class Serializer {
public void write(DataOutput output, BlockHeader blockHeader) throws IOException {
assert blockHeader.linesCount > 0 : "Block header is not initialized";
output.writeVInt(blockHeader.linesCount);
output.writeVLong(blockHeader.baseDocsFP);
output.writeVLong(blockHeader.basePositionsFP);
output.writeVLong(blockHeader.basePayloadsFP);
output.writeVInt(blockHeader.termStatesBaseOffset);
output.writeVInt(blockHeader.middleLineOffset);
}
public BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
int linesCount = input.readVInt();
if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
throw new CorruptIndexException("Illegal number of lines in a block: " + linesCount, input);
}
long baseDocsFP = input.readVLong();
long basePositionsFP = input.readVLong();
long basePayloadsFP = input.readVLong();
int termStatesBaseOffset = input.readVInt();
if (termStatesBaseOffset < 0) {
throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
}
int middleTermOffset = input.readVInt();
if (middleTermOffset < 0) {
throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
}
BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
}
}
} }

View File

@ -107,7 +107,7 @@ public class BlockLine implements Accountable {
} }
/** /**
* Reads block lines with terms encoded incrementally inside a block. * Reads/writes block lines with terms encoded incrementally inside a block.
* This class keeps a state of the previous term read to decode the next term. * This class keeps a state of the previous term read to decode the next term.
*/ */
public static class Serializer implements Accountable { public static class Serializer implements Accountable {
@ -149,7 +149,7 @@ public class BlockLine implements Accountable {
* the incremental encoding. {@code true} for the first * the incremental encoding. {@code true} for the first
* and middle term, {@code false} for other terms. * and middle term, {@code false} for other terms.
*/ */
public static void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine, public void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
int termStateRelativeOffset, boolean isIncrementalEncodingSeed) throws IOException { int termStateRelativeOffset, boolean isIncrementalEncodingSeed) throws IOException {
blockOutput.writeVInt(termStateRelativeOffset); blockOutput.writeVInt(termStateRelativeOffset);
writeIncrementallyEncodedTerm(line.getTermBytes(), previousLine == null ? null : previousLine.getTermBytes(), writeIncrementallyEncodedTerm(line.getTermBytes(), previousLine == null ? null : previousLine.getTermBytes(),
@ -161,13 +161,13 @@ public class BlockLine implements Accountable {
* *
* @param termStatesOutput The output pointing to the details region. * @param termStatesOutput The output pointing to the details region.
*/ */
protected static void writeLineTermState(DataOutput termStatesOutput, BlockLine line, protected void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
FieldInfo fieldInfo, DeltaBaseTermStateSerializer encoder) throws IOException { FieldInfo fieldInfo, DeltaBaseTermStateSerializer encoder) throws IOException {
assert line.termState != null; assert line.termState != null;
encoder.writeTermState(termStatesOutput, fieldInfo, line.termState); encoder.writeTermState(termStatesOutput, fieldInfo, line.termState);
} }
protected static void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes, protected void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
boolean isIncrementalEncodingSeed, DataOutput blockOutput) throws IOException { boolean isIncrementalEncodingSeed, DataOutput blockOutput) throws IOException {
BytesRef term = termBytes.getTerm(); BytesRef term = termBytes.getTerm();
assert term.offset == 0; assert term.offset == 0;
@ -240,7 +240,7 @@ public class BlockLine implements Accountable {
* Reads {@code length} bytes from the given {@link DataInput} and stores * Reads {@code length} bytes from the given {@link DataInput} and stores
* them at {@code offset} in {@code bytes.bytes}. * them at {@code offset} in {@code bytes.bytes}.
*/ */
protected static void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException { protected void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException {
assert bytes.offset == 0; assert bytes.offset == 0;
bytes.length = offset + length; bytes.length = offset + length;
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length); bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length);

View File

@ -60,6 +60,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
protected final FieldMetadata fieldMetadata; protected final FieldMetadata fieldMetadata;
protected final BlockDecoder blockDecoder; protected final BlockDecoder blockDecoder;
protected BlockHeader.Serializer blockHeaderReader;
protected BlockLine.Serializer blockLineReader; protected BlockLine.Serializer blockLineReader;
/** /**
* In-memory read buffer for the current block. * In-memory read buffer for the current block.
@ -406,14 +407,27 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
protected void initializeBlockReadLazily() throws IOException { protected void initializeBlockReadLazily() throws IOException {
if (blockStartFP == -1) { if (blockStartFP == -1) {
blockInput = blockInput.clone(); blockInput = blockInput.clone();
blockLineReader = new BlockLine.Serializer(); blockHeaderReader = createBlockHeaderSerializer();
blockLineReader = createBlockLineSerializer();
blockReadBuffer = new ByteArrayDataInput(); blockReadBuffer = new ByteArrayDataInput();
termStatesReadBuffer = new ByteArrayDataInput(); termStatesReadBuffer = new ByteArrayDataInput();
termStateSerializer = new DeltaBaseTermStateSerializer(); termStateSerializer = createDeltaBaseTermStateSerializer();
scratchBlockBytes = new BytesRef(); scratchBlockBytes = new BytesRef();
} }
} }
protected BlockHeader.Serializer createBlockHeaderSerializer() {
return new BlockHeader.Serializer();
}
protected BlockLine.Serializer createBlockLineSerializer() {
return new BlockLine.Serializer();
}
protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
return new DeltaBaseTermStateSerializer();
}
/** /**
* Reads the block header. * Reads the block header.
* Sets {@link #blockHeader}. * Sets {@link #blockHeader}.
@ -428,7 +442,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
BytesRef blockBytesRef = decodeBlockBytesIfNeeded(numBlockBytes); BytesRef blockBytesRef = decodeBlockBytesIfNeeded(numBlockBytes);
blockReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length); blockReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
termStatesReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length); termStatesReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
return blockHeader = BlockHeader.read(blockReadBuffer, blockHeader); return blockHeader = blockHeaderReader.read(blockReadBuffer, blockHeader);
} }
protected BytesRef decodeBlockBytesIfNeeded(int numBlockBytes) throws IOException { protected BytesRef decodeBlockBytesIfNeeded(int numBlockBytes) throws IOException {

View File

@ -60,6 +60,8 @@ public class BlockWriter {
protected final ByteBuffersDataOutput blockLinesWriteBuffer; protected final ByteBuffersDataOutput blockLinesWriteBuffer;
protected final ByteBuffersDataOutput termStatesWriteBuffer; protected final ByteBuffersDataOutput termStatesWriteBuffer;
protected final BlockHeader.Serializer blockHeaderWriter;
protected final BlockLine.Serializer blockLineWriter;
protected final DeltaBaseTermStateSerializer termStateSerializer; protected final DeltaBaseTermStateSerializer termStateSerializer;
protected final BlockEncoder blockEncoder; protected final BlockEncoder blockEncoder;
protected final ByteBuffersDataOutput blockWriteBuffer; protected final ByteBuffersDataOutput blockWriteBuffer;
@ -81,7 +83,9 @@ public class BlockWriter {
this.blockEncoder = blockEncoder; this.blockEncoder = blockEncoder;
this.blockLines = new ArrayList<>(targetNumBlockLines); this.blockLines = new ArrayList<>(targetNumBlockLines);
this.termStateSerializer = new DeltaBaseTermStateSerializer(); this.blockHeaderWriter = createBlockHeaderSerializer();
this.blockLineWriter = createBlockLineSerializer();
this.termStateSerializer = createDeltaBaseTermStateSerializer();
this.blockLinesWriteBuffer = ByteBuffersDataOutput.newResettableInstance(); this.blockLinesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
this.termStatesWriteBuffer = ByteBuffersDataOutput.newResettableInstance(); this.termStatesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
@ -91,6 +95,18 @@ public class BlockWriter {
this.scratchBytesRef = new BytesRef(); this.scratchBytesRef = new BytesRef();
} }
protected BlockHeader.Serializer createBlockHeaderSerializer() {
return new BlockHeader.Serializer();
}
protected BlockLine.Serializer createBlockLineSerializer() {
return new BlockLine.Serializer();
}
protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
return new DeltaBaseTermStateSerializer();
}
/** /**
* Adds a new {@link BlockLine} term for the current field. * Adds a new {@link BlockLine} term for the current field.
* <p> * <p>
@ -196,7 +212,7 @@ public class BlockWriter {
reusableBlockHeader.reset(blockLines.size(), termStateSerializer.getBaseDocStartFP(), termStateSerializer.getBasePosStartFP(), reusableBlockHeader.reset(blockLines.size(), termStateSerializer.getBaseDocStartFP(), termStateSerializer.getBasePosStartFP(),
termStateSerializer.getBasePayStartFP(), Math.toIntExact(blockLinesWriteBuffer.size()), middleOffset); termStateSerializer.getBasePayStartFP(), Math.toIntExact(blockLinesWriteBuffer.size()), middleOffset);
reusableBlockHeader.write(blockWriteBuffer); blockHeaderWriter.write(blockWriteBuffer, reusableBlockHeader);
blockLinesWriteBuffer.copyTo(blockWriteBuffer); blockLinesWriteBuffer.copyTo(blockWriteBuffer);
termStatesWriteBuffer.copyTo(blockWriteBuffer); termStatesWriteBuffer.copyTo(blockWriteBuffer);
@ -236,8 +252,8 @@ public class BlockWriter {
protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException { protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException {
assert fieldMetadata != null; assert fieldMetadata != null;
BlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed); blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
BlockLine.Serializer.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(), termStateSerializer); blockLineWriter.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(), termStateSerializer);
} }
/** /**

View File

@ -194,88 +194,99 @@ public class FieldMetadata implements Accountable {
+ (docsSeen == null ? 0 : docsSeen.ramBytesUsed()); + (docsSeen == null ? 0 : docsSeen.ramBytesUsed());
} }
public static FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws IOException { /**
int fieldId = input.readVInt(); * Reads/writes field metadata.
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId); */
if (fieldInfo == null) { public static class Serializer {
throw new CorruptIndexException("Illegal field id= " + fieldId, input);
}
FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
fieldMetadata.numTerms = input.readVInt(); /**
if (fieldMetadata.numTerms <= 0) { * Stateless singleton.
throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms + " for field= " + fieldId, input); */
} public static final Serializer INSTANCE = new Serializer();
fieldMetadata.sumDocFreq = input.readVInt(); public void write(DataOutput output, FieldMetadata fieldMetadata) throws IOException {
fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq; assert fieldMetadata.dictionaryStartFP >= 0;
if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) { assert fieldMetadata.firstBlockStartFP >= 0;
fieldMetadata.sumTotalTermFreq += input.readVInt(); assert fieldMetadata.lastBlockStartFP >= 0;
if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) { assert fieldMetadata.numTerms > 0 : "There should be at least one term for field " + fieldMetadata.fieldInfo.name + ": " + fieldMetadata.numTerms;
// #positions must be >= #postings. assert fieldMetadata.firstBlockStartFP <= fieldMetadata.lastBlockStartFP : "start: " + fieldMetadata.firstBlockStartFP + " end: " + fieldMetadata.lastBlockStartFP;
throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq assert fieldMetadata.lastTerm != null : "you must set the last term";
+ " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
output.writeVInt(fieldMetadata.fieldInfo.number);
output.writeVInt(fieldMetadata.numTerms);
output.writeVInt(fieldMetadata.sumDocFreq);
if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
assert fieldMetadata.sumTotalTermFreq >= fieldMetadata.sumDocFreq : "sumTotalFQ: " + fieldMetadata.sumTotalTermFreq + " sumDocFQ: " + fieldMetadata.sumDocFreq;
output.writeVInt(fieldMetadata.sumTotalTermFreq - fieldMetadata.sumDocFreq);
}
output.writeVInt(fieldMetadata.getDocCount());
output.writeVLong(fieldMetadata.dictionaryStartFP);
output.writeVLong(fieldMetadata.firstBlockStartFP);
output.writeVLong(fieldMetadata.lastBlockStartFP);
if (fieldMetadata.lastTerm.length > 0) {
output.writeVInt(fieldMetadata.lastTerm.length);
output.writeBytes(fieldMetadata.lastTerm.bytes, fieldMetadata.lastTerm.offset, fieldMetadata.lastTerm.length);
} else {
output.writeVInt(0);
} }
} }
fieldMetadata.docCount = input.readVInt(); public FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) { int fieldId = input.readVInt();
// #docs with field must be <= #docs. FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount if (fieldInfo == null) {
+ " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input); throw new CorruptIndexException("Illegal field id= " + fieldId, input);
} }
if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) { FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
// #postings must be >= #docs with field.
throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
+ " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
}
fieldMetadata.dictionaryStartFP = input.readVLong(); fieldMetadata.numTerms = input.readVInt();
fieldMetadata.firstBlockStartFP = input.readVLong(); if (fieldMetadata.numTerms <= 0) {
fieldMetadata.lastBlockStartFP = input.readVLong(); throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms + " for field= " + fieldId, input);
}
int lastTermLength = input.readVInt(); fieldMetadata.sumDocFreq = input.readVInt();
BytesRef lastTerm = new BytesRef(lastTermLength); fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
if (lastTermLength > 0) { if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
input.readBytes(lastTerm.bytes, 0, lastTermLength); fieldMetadata.sumTotalTermFreq += input.readVInt();
lastTerm.length = lastTermLength; if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
} else if (lastTermLength < 0) { // #positions must be >= #postings.
throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for field= " + fieldId, input); throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
} + " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
fieldMetadata.setLastTerm(lastTerm); }
}
return fieldMetadata; fieldMetadata.docCount = input.readVInt();
} if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
// #docs with field must be <= #docs.
throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
+ " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
}
if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
// #postings must be >= #docs with field.
throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
+ " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
}
public void write(DataOutput output) throws IOException { fieldMetadata.dictionaryStartFP = input.readVLong();
assert dictionaryStartFP >= 0; fieldMetadata.firstBlockStartFP = input.readVLong();
assert firstBlockStartFP >= 0; fieldMetadata.lastBlockStartFP = input.readVLong();
assert lastBlockStartFP >= 0;
assert numTerms > 0 : "There should be at least one term for field " + fieldInfo.name + ": " + numTerms;
assert firstBlockStartFP <= lastBlockStartFP : "start: " + firstBlockStartFP + " end: " + lastBlockStartFP;
assert lastTerm != null : "you must set the last term";
output.writeVInt(fieldInfo.number); int lastTermLength = input.readVInt();
BytesRef lastTerm = new BytesRef(lastTermLength);
if (lastTermLength > 0) {
input.readBytes(lastTerm.bytes, 0, lastTermLength);
lastTerm.length = lastTermLength;
} else if (lastTermLength < 0) {
throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for field= " + fieldId, input);
}
fieldMetadata.setLastTerm(lastTerm);
output.writeVInt(numTerms); return fieldMetadata;
output.writeVInt(sumDocFreq);
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
assert sumTotalTermFreq >= sumDocFreq : "sumTotalFQ: " + sumTotalTermFreq + " sumDocFQ: " + sumDocFreq;
output.writeVInt(sumTotalTermFreq - sumDocFreq);
}
output.writeVInt(getDocCount());
output.writeVLong(dictionaryStartFP);
output.writeVLong(firstBlockStartFP);
output.writeVLong(lastBlockStartFP);
if (lastTerm.length > 0) {
output.writeVInt(lastTerm.length);
output.writeBytes(lastTerm.bytes, lastTerm.offset, lastTerm.length);
} else {
output.writeVInt(0);
} }
} }
} }

View File

@ -69,7 +69,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
* It can be used for decompression or decryption. * It can be used for decompression or decryption.
*/ */
public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException { public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
this(postingsReader, state, blockDecoder, NAME, VERSION_START, VERSION_CURRENT, this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION); TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
} }
@ -77,8 +77,10 @@ public class UniformSplitTermsReader extends FieldsProducer {
* @param blockDecoder Optional block decoder, may be null if none. * @param blockDecoder Optional block decoder, may be null if none.
* It can be used for decompression or decryption. * It can be used for decompression or decryption.
*/ */
protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder, protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
String codecName, int versionStart, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException { BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
String codecName, int versionStart, int versionCurrent,
String termsBlocksExtension, String dictionaryExtension) throws IOException {
IndexInput dictionaryInput = null; IndexInput dictionaryInput = null;
IndexInput blockInput = null; IndexInput blockInput = null;
boolean success = false; boolean success = false;
@ -100,7 +102,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
CodecUtil.retrieveChecksum(blockInput); CodecUtil.retrieveChecksum(blockInput);
seekFieldsMetadata(blockInput); seekFieldsMetadata(blockInput);
Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput, state.fieldInfos, state.segmentInfo.maxDoc()); Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput, state.fieldInfos, fieldMetadataReader, state.segmentInfo.maxDoc());
fieldToTermsMap = new HashMap<>(); fieldToTermsMap = new HashMap<>();
this.blockInput = blockInput; this.blockInput = blockInput;
@ -133,19 +135,19 @@ public class UniformSplitTermsReader extends FieldsProducer {
* @param indexInput {@link IndexInput} must be positioned to the fields metadata * @param indexInput {@link IndexInput} must be positioned to the fields metadata
* details by calling {@link #seekFieldsMetadata(IndexInput)} before this call. * details by calling {@link #seekFieldsMetadata(IndexInput)} before this call.
*/ */
protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput, FieldInfos fieldInfos, int maxNumDocs) throws IOException { protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput, FieldInfos fieldInfos,
FieldMetadata.Serializer fieldMetadataReader, int maxNumDocs) throws IOException {
int numFields = indexInput.readVInt(); int numFields = indexInput.readVInt();
if (numFields < 0) { if (numFields < 0) {
throw new CorruptIndexException("Illegal number of fields= " + numFields, indexInput); throw new CorruptIndexException("Illegal number of fields= " + numFields, indexInput);
} }
Collection<FieldMetadata> fieldMetadataCollection = new ArrayList<>(numFields); Collection<FieldMetadata> fieldMetadataCollection = new ArrayList<>(numFields);
for (int i = 0; i < numFields; i++) { for (int i = 0; i < numFields; i++) {
fieldMetadataCollection.add(FieldMetadata.read(indexInput, fieldInfos, maxNumDocs)); fieldMetadataCollection.add(fieldMetadataReader.read(indexInput, fieldInfos, maxNumDocs));
} }
return fieldMetadataCollection; return fieldMetadataCollection;
} }
@Override @Override
public void close() throws IOException { public void close() throws IOException {
try { try {

View File

@ -128,6 +128,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
protected final int deltaNumLines; protected final int deltaNumLines;
protected final BlockEncoder blockEncoder; protected final BlockEncoder blockEncoder;
protected final FieldMetadata.Serializer fieldMetadataWriter;
protected final IndexOutput blockOutput; protected final IndexOutput blockOutput;
protected final IndexOutput dictionaryOutput; protected final IndexOutput dictionaryOutput;
@ -146,7 +147,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
*/ */
public UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state, public UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException { int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION); NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
} }
@ -164,7 +165,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
* It can be used for compression or encryption. * It can be used for compression or encryption.
*/ */
protected UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state, protected UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException { String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
validateSettings(targetNumBlockLines, deltaNumLines); validateSettings(targetNumBlockLines, deltaNumLines);
IndexOutput blockOutput = null; IndexOutput blockOutput = null;
@ -177,6 +178,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
this.targetNumBlockLines = targetNumBlockLines; this.targetNumBlockLines = targetNumBlockLines;
this.deltaNumLines = deltaNumLines; this.deltaNumLines = deltaNumLines;
this.blockEncoder = blockEncoder; this.blockEncoder = blockEncoder;
this.fieldMetadataWriter = fieldMetadataWriter;
String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, termsBlocksExtension); String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, termsBlocksExtension);
blockOutput = state.directory.createOutput(termsName, state.context); blockOutput = state.directory.createOutput(termsName, state.context);
@ -278,7 +280,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
if (fieldMetadata.getNumTerms() > 0) { if (fieldMetadata.getNumTerms() > 0) {
fieldMetadata.setLastTerm(lastTerm); fieldMetadata.setLastTerm(lastTerm);
fieldMetadata.write(fieldsOutput); fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
writeDictionary(dictionaryBuilder); writeDictionary(dictionaryBuilder);
return 1; return 1;
} }

View File

@ -75,7 +75,7 @@ public class STBlockLine extends BlockLine {
/** /**
* Writes all the {@link BlockTermState} of the provided {@link STBlockLine} to the given output. * Writes all the {@link BlockTermState} of the provided {@link STBlockLine} to the given output.
*/ */
public static void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line, public void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
DeltaBaseTermStateSerializer encoder) throws IOException { DeltaBaseTermStateSerializer encoder) throws IOException {
FieldMetadataTermState fieldMetadataTermState; FieldMetadataTermState fieldMetadataTermState;
@ -111,7 +111,7 @@ public class STBlockLine extends BlockLine {
* @return The {@link BlockTermState} corresponding to the provided field id; or null if the field * @return The {@link BlockTermState} corresponding to the provided field id; or null if the field
* does not occur in the line. * does not occur in the line.
*/ */
public static BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput, public BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
DeltaBaseTermStateSerializer termStateSerializer, DeltaBaseTermStateSerializer termStateSerializer,
BlockHeader blockHeader, FieldInfos fieldInfos, BlockHeader blockHeader, FieldInfos fieldInfos,
BlockTermState reuse) throws IOException { BlockTermState reuse) throws IOException {
@ -161,7 +161,7 @@ public class STBlockLine extends BlockLine {
* @param fieldTermStatesMap Map filled with the term states for each field. It is cleared first. * @param fieldTermStatesMap Map filled with the term states for each field. It is cleared first.
* @see #readTermStateForField * @see #readTermStateForField
*/ */
public static void readFieldTermStatesMap(DataInput termStatesInput, public void readFieldTermStatesMap(DataInput termStatesInput,
DeltaBaseTermStateSerializer termStateSerializer, DeltaBaseTermStateSerializer termStateSerializer,
BlockHeader blockHeader, BlockHeader blockHeader,
FieldInfos fieldInfos, FieldInfos fieldInfos,
@ -183,7 +183,7 @@ public class STBlockLine extends BlockLine {
/** /**
* Reads all the field ids in the current block line of the provided input. * Reads all the field ids in the current block line of the provided input.
*/ */
public static int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException { public int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
int[] fieldIds = new int[numFields]; int[] fieldIds = new int[numFields];
for (int i = 0; i < numFields; i++) { for (int i = 0; i < numFields; i++) {
fieldIds[i] = termStatesInput.readVInt(); fieldIds[i] = termStatesInput.readVInt();

View File

@ -117,6 +117,11 @@ public class STBlockReader extends BlockReader {
return blockStartFP > fieldMetadata.getLastBlockStartFP() || super.isBeyondLastTerm(searchedTerm, blockStartFP); return blockStartFP > fieldMetadata.getLastBlockStartFP() || super.isBeyondLastTerm(searchedTerm, blockStartFP);
} }
@Override
protected STBlockLine.Serializer createBlockLineSerializer() {
return new STBlockLine.Serializer();
}
/** /**
* Reads the {@link BlockTermState} on the current line for this reader's field. * Reads the {@link BlockTermState} on the current line for this reader's field.
* *
@ -125,7 +130,7 @@ public class STBlockReader extends BlockReader {
@Override @Override
protected BlockTermState readTermState() throws IOException { protected BlockTermState readTermState() throws IOException {
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset()); termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
return termState = STBlockLine.Serializer.readTermStateForField( return termState = ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
fieldMetadata.getFieldInfo().number, fieldMetadata.getFieldInfo().number,
termStatesReadBuffer, termStatesReadBuffer,
termStateSerializer, termStateSerializer,

View File

@ -84,10 +84,15 @@ public class STBlockWriter extends BlockWriter {
super.finishLastBlock(dictionaryBuilder); super.finishLastBlock(dictionaryBuilder);
} }
@Override
protected BlockLine.Serializer createBlockLineSerializer() {
return new STBlockLine.Serializer();
}
@Override @Override
protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException { protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException {
STBlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed); blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
STBlockLine.Serializer.writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer); ((STBlockLine.Serializer) blockLineWriter).writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
((STBlockLine) line).collectFields(fieldsInBlock); ((STBlockLine) line).collectFields(fieldsInBlock);
} }

View File

@ -91,6 +91,11 @@ public class STIntersectBlockReader extends IntersectBlockReader {
return super.nextBlockMatchingPrefix() && blockHeader != null; return super.nextBlockMatchingPrefix() && blockHeader != null;
} }
@Override
protected STBlockLine.Serializer createBlockLineSerializer() {
return new STBlockLine.Serializer();
}
/** /**
* Reads the {@link BlockTermState} on the current line for the specific field * Reads the {@link BlockTermState} on the current line for the specific field
* corresponding this this reader. * corresponding this this reader.
@ -100,7 +105,7 @@ public class STIntersectBlockReader extends IntersectBlockReader {
@Override @Override
protected BlockTermState readTermState() throws IOException { protected BlockTermState readTermState() throws IOException {
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset()); termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
return STBlockLine.Serializer.readTermStateForField( return ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
fieldMetadata.getFieldInfo().number, fieldMetadata.getFieldInfo().number,
termStatesReadBuffer, termStatesReadBuffer,
termStateSerializer, termStateSerializer,

View File

@ -98,7 +98,7 @@ public class STMergingBlockReader extends STBlockReader {
public void readFieldTermStatesMap(Map<String, BlockTermState> fieldTermStatesMap) throws IOException { public void readFieldTermStatesMap(Map<String, BlockTermState> fieldTermStatesMap) throws IOException {
if (term() != null) { if (term() != null) {
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset()); termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
STBlockLine.Serializer.readFieldTermStatesMap( ((STBlockLine.Serializer) blockLineReader).readFieldTermStatesMap(
termStatesReadBuffer, termStatesReadBuffer,
termStateSerializer, termStateSerializer,
blockHeader, blockHeader,

View File

@ -47,13 +47,15 @@ import static org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPo
public class STUniformSplitTermsReader extends UniformSplitTermsReader { public class STUniformSplitTermsReader extends UniformSplitTermsReader {
public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException { public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
super(postingsReader, state, blockDecoder, NAME, VERSION_START, this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE,
VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION); NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
} }
protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder, protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
String codecName, int versionStart, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException { BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
super(postingsReader, state, blockDecoder, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension); String codecName, int versionStart, int versionCurrent,
String termsBlocksExtension, String dictionaryExtension) throws IOException {
super(postingsReader, state, blockDecoder, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
} }
@Override @Override

View File

@ -88,13 +88,14 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter {
public STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state, public STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException { int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION); this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
} }
protected STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state, protected STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException { String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, codecName, versionCurrent, termsBlocksExtension, dictionaryExtension); super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, fieldMetadataWriter, codecName, versionCurrent, termsBlocksExtension, dictionaryExtension);
} }
@Override @Override
@ -200,7 +201,7 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter {
int fieldsNumber = 0; int fieldsNumber = 0;
for (FieldMetadata fieldMetadata : fieldMetadataList) { for (FieldMetadata fieldMetadata : fieldMetadataList) {
if (fieldMetadata.getNumTerms() > 0) { if (fieldMetadata.getNumTerms() > 0) {
fieldMetadata.write(fieldsOutput); fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
fieldsNumber++; fieldsNumber++;
} }
} }