mirror of https://github.com/apache/lucene.git
LUCENE-9106: UniformSplit postings format allows extension of block/line serializers.
This commit is contained in:
parent
6eff727590
commit
a97271fc52
|
@ -28,6 +28,8 @@ Improvements
|
|||
|
||||
* LUCENE-9105: UniformSplit postings format detects corrupted index and better handles IO exceptions. (Bruno Roustant)
|
||||
|
||||
* LUCENE-9106: UniformSplit postings format allows extension of block/line serializers. (Bruno Roustant)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
|
|
|
@ -134,43 +134,49 @@ public class BlockHeader implements Accountable {
|
|||
return basePayloadsFP;
|
||||
}
|
||||
|
||||
public void write(DataOutput output) throws IOException {
|
||||
assert linesCount > 0 : "block header does not seem to be initialized";
|
||||
output.writeVInt(linesCount);
|
||||
|
||||
output.writeVLong(baseDocsFP);
|
||||
output.writeVLong(basePositionsFP);
|
||||
output.writeVLong(basePayloadsFP);
|
||||
|
||||
output.writeVInt(termStatesBaseOffset);
|
||||
output.writeVInt(middleLineOffset);
|
||||
}
|
||||
|
||||
public static BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
|
||||
int linesCount = input.readVInt();
|
||||
if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
|
||||
throw new CorruptIndexException("Illegal number of lines in a block: " + linesCount, input);
|
||||
}
|
||||
|
||||
long baseDocsFP = input.readVLong();
|
||||
long basePositionsFP = input.readVLong();
|
||||
long basePayloadsFP = input.readVLong();
|
||||
|
||||
int termStatesBaseOffset = input.readVInt();
|
||||
if (termStatesBaseOffset < 0) {
|
||||
throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
|
||||
}
|
||||
int middleTermOffset = input.readVInt();
|
||||
if (middleTermOffset < 0) {
|
||||
throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
|
||||
}
|
||||
|
||||
BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
|
||||
return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RAM_USAGE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads/writes block header.
|
||||
*/
|
||||
public static class Serializer {
|
||||
|
||||
public void write(DataOutput output, BlockHeader blockHeader) throws IOException {
|
||||
assert blockHeader.linesCount > 0 : "Block header is not initialized";
|
||||
output.writeVInt(blockHeader.linesCount);
|
||||
|
||||
output.writeVLong(blockHeader.baseDocsFP);
|
||||
output.writeVLong(blockHeader.basePositionsFP);
|
||||
output.writeVLong(blockHeader.basePayloadsFP);
|
||||
|
||||
output.writeVInt(blockHeader.termStatesBaseOffset);
|
||||
output.writeVInt(blockHeader.middleLineOffset);
|
||||
}
|
||||
|
||||
public BlockHeader read(DataInput input, BlockHeader reuse) throws IOException {
|
||||
int linesCount = input.readVInt();
|
||||
if (linesCount <= 0 || linesCount > UniformSplitTermsWriter.MAX_NUM_BLOCK_LINES) {
|
||||
throw new CorruptIndexException("Illegal number of lines in a block: " + linesCount, input);
|
||||
}
|
||||
|
||||
long baseDocsFP = input.readVLong();
|
||||
long basePositionsFP = input.readVLong();
|
||||
long basePayloadsFP = input.readVLong();
|
||||
|
||||
int termStatesBaseOffset = input.readVInt();
|
||||
if (termStatesBaseOffset < 0) {
|
||||
throw new CorruptIndexException("Illegal termStatesBaseOffset= " + termStatesBaseOffset, input);
|
||||
}
|
||||
int middleTermOffset = input.readVInt();
|
||||
if (middleTermOffset < 0) {
|
||||
throw new CorruptIndexException("Illegal middleTermOffset= " + middleTermOffset, input);
|
||||
}
|
||||
|
||||
BlockHeader blockHeader = reuse == null ? new BlockHeader() : reuse;
|
||||
return blockHeader.reset(linesCount, baseDocsFP, basePositionsFP, basePayloadsFP, termStatesBaseOffset, middleTermOffset);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -107,7 +107,7 @@ public class BlockLine implements Accountable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Reads block lines with terms encoded incrementally inside a block.
|
||||
* Reads/writes block lines with terms encoded incrementally inside a block.
|
||||
* This class keeps a state of the previous term read to decode the next term.
|
||||
*/
|
||||
public static class Serializer implements Accountable {
|
||||
|
@ -149,7 +149,7 @@ public class BlockLine implements Accountable {
|
|||
* the incremental encoding. {@code true} for the first
|
||||
* and middle term, {@code false} for other terms.
|
||||
*/
|
||||
public static void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
|
||||
public void writeLine(DataOutput blockOutput, BlockLine line, BlockLine previousLine,
|
||||
int termStateRelativeOffset, boolean isIncrementalEncodingSeed) throws IOException {
|
||||
blockOutput.writeVInt(termStateRelativeOffset);
|
||||
writeIncrementallyEncodedTerm(line.getTermBytes(), previousLine == null ? null : previousLine.getTermBytes(),
|
||||
|
@ -161,13 +161,13 @@ public class BlockLine implements Accountable {
|
|||
*
|
||||
* @param termStatesOutput The output pointing to the details region.
|
||||
*/
|
||||
protected static void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
|
||||
protected void writeLineTermState(DataOutput termStatesOutput, BlockLine line,
|
||||
FieldInfo fieldInfo, DeltaBaseTermStateSerializer encoder) throws IOException {
|
||||
assert line.termState != null;
|
||||
encoder.writeTermState(termStatesOutput, fieldInfo, line.termState);
|
||||
}
|
||||
|
||||
protected static void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
|
||||
protected void writeIncrementallyEncodedTerm(TermBytes termBytes, TermBytes previousTermBytes,
|
||||
boolean isIncrementalEncodingSeed, DataOutput blockOutput) throws IOException {
|
||||
BytesRef term = termBytes.getTerm();
|
||||
assert term.offset == 0;
|
||||
|
@ -240,7 +240,7 @@ public class BlockLine implements Accountable {
|
|||
* Reads {@code length} bytes from the given {@link DataInput} and stores
|
||||
* them at {@code offset} in {@code bytes.bytes}.
|
||||
*/
|
||||
protected static void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException {
|
||||
protected void readBytes(DataInput input, BytesRef bytes, int offset, int length) throws IOException {
|
||||
assert bytes.offset == 0;
|
||||
bytes.length = offset + length;
|
||||
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length);
|
||||
|
|
|
@ -60,6 +60,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
|
|||
protected final FieldMetadata fieldMetadata;
|
||||
protected final BlockDecoder blockDecoder;
|
||||
|
||||
protected BlockHeader.Serializer blockHeaderReader;
|
||||
protected BlockLine.Serializer blockLineReader;
|
||||
/**
|
||||
* In-memory read buffer for the current block.
|
||||
|
@ -406,14 +407,27 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
|
|||
protected void initializeBlockReadLazily() throws IOException {
|
||||
if (blockStartFP == -1) {
|
||||
blockInput = blockInput.clone();
|
||||
blockLineReader = new BlockLine.Serializer();
|
||||
blockHeaderReader = createBlockHeaderSerializer();
|
||||
blockLineReader = createBlockLineSerializer();
|
||||
blockReadBuffer = new ByteArrayDataInput();
|
||||
termStatesReadBuffer = new ByteArrayDataInput();
|
||||
termStateSerializer = new DeltaBaseTermStateSerializer();
|
||||
termStateSerializer = createDeltaBaseTermStateSerializer();
|
||||
scratchBlockBytes = new BytesRef();
|
||||
}
|
||||
}
|
||||
|
||||
protected BlockHeader.Serializer createBlockHeaderSerializer() {
|
||||
return new BlockHeader.Serializer();
|
||||
}
|
||||
|
||||
protected BlockLine.Serializer createBlockLineSerializer() {
|
||||
return new BlockLine.Serializer();
|
||||
}
|
||||
|
||||
protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
|
||||
return new DeltaBaseTermStateSerializer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the block header.
|
||||
* Sets {@link #blockHeader}.
|
||||
|
@ -428,7 +442,7 @@ public class BlockReader extends BaseTermsEnum implements Accountable {
|
|||
BytesRef blockBytesRef = decodeBlockBytesIfNeeded(numBlockBytes);
|
||||
blockReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
|
||||
termStatesReadBuffer.reset(blockBytesRef.bytes, blockBytesRef.offset, blockBytesRef.length);
|
||||
return blockHeader = BlockHeader.read(blockReadBuffer, blockHeader);
|
||||
return blockHeader = blockHeaderReader.read(blockReadBuffer, blockHeader);
|
||||
}
|
||||
|
||||
protected BytesRef decodeBlockBytesIfNeeded(int numBlockBytes) throws IOException {
|
||||
|
|
|
@ -60,6 +60,8 @@ public class BlockWriter {
|
|||
protected final ByteBuffersDataOutput blockLinesWriteBuffer;
|
||||
protected final ByteBuffersDataOutput termStatesWriteBuffer;
|
||||
|
||||
protected final BlockHeader.Serializer blockHeaderWriter;
|
||||
protected final BlockLine.Serializer blockLineWriter;
|
||||
protected final DeltaBaseTermStateSerializer termStateSerializer;
|
||||
protected final BlockEncoder blockEncoder;
|
||||
protected final ByteBuffersDataOutput blockWriteBuffer;
|
||||
|
@ -81,7 +83,9 @@ public class BlockWriter {
|
|||
this.blockEncoder = blockEncoder;
|
||||
|
||||
this.blockLines = new ArrayList<>(targetNumBlockLines);
|
||||
this.termStateSerializer = new DeltaBaseTermStateSerializer();
|
||||
this.blockHeaderWriter = createBlockHeaderSerializer();
|
||||
this.blockLineWriter = createBlockLineSerializer();
|
||||
this.termStateSerializer = createDeltaBaseTermStateSerializer();
|
||||
|
||||
this.blockLinesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
|
||||
this.termStatesWriteBuffer = ByteBuffersDataOutput.newResettableInstance();
|
||||
|
@ -91,6 +95,18 @@ public class BlockWriter {
|
|||
this.scratchBytesRef = new BytesRef();
|
||||
}
|
||||
|
||||
protected BlockHeader.Serializer createBlockHeaderSerializer() {
|
||||
return new BlockHeader.Serializer();
|
||||
}
|
||||
|
||||
protected BlockLine.Serializer createBlockLineSerializer() {
|
||||
return new BlockLine.Serializer();
|
||||
}
|
||||
|
||||
protected DeltaBaseTermStateSerializer createDeltaBaseTermStateSerializer() {
|
||||
return new DeltaBaseTermStateSerializer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new {@link BlockLine} term for the current field.
|
||||
* <p>
|
||||
|
@ -196,7 +212,7 @@ public class BlockWriter {
|
|||
|
||||
reusableBlockHeader.reset(blockLines.size(), termStateSerializer.getBaseDocStartFP(), termStateSerializer.getBasePosStartFP(),
|
||||
termStateSerializer.getBasePayStartFP(), Math.toIntExact(blockLinesWriteBuffer.size()), middleOffset);
|
||||
reusableBlockHeader.write(blockWriteBuffer);
|
||||
blockHeaderWriter.write(blockWriteBuffer, reusableBlockHeader);
|
||||
|
||||
blockLinesWriteBuffer.copyTo(blockWriteBuffer);
|
||||
termStatesWriteBuffer.copyTo(blockWriteBuffer);
|
||||
|
@ -236,8 +252,8 @@ public class BlockWriter {
|
|||
|
||||
protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException {
|
||||
assert fieldMetadata != null;
|
||||
BlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
|
||||
BlockLine.Serializer.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(), termStateSerializer);
|
||||
blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
|
||||
blockLineWriter.writeLineTermState(termStatesWriteBuffer, line, fieldMetadata.getFieldInfo(), termStateSerializer);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -194,88 +194,99 @@ public class FieldMetadata implements Accountable {
|
|||
+ (docsSeen == null ? 0 : docsSeen.ramBytesUsed());
|
||||
}
|
||||
|
||||
public static FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
|
||||
int fieldId = input.readVInt();
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
|
||||
if (fieldInfo == null) {
|
||||
throw new CorruptIndexException("Illegal field id= " + fieldId, input);
|
||||
}
|
||||
FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
|
||||
/**
|
||||
* Reads/writes field metadata.
|
||||
*/
|
||||
public static class Serializer {
|
||||
|
||||
fieldMetadata.numTerms = input.readVInt();
|
||||
if (fieldMetadata.numTerms <= 0) {
|
||||
throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms + " for field= " + fieldId, input);
|
||||
}
|
||||
/**
|
||||
* Stateless singleton.
|
||||
*/
|
||||
public static final Serializer INSTANCE = new Serializer();
|
||||
|
||||
fieldMetadata.sumDocFreq = input.readVInt();
|
||||
fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
|
||||
if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
|
||||
fieldMetadata.sumTotalTermFreq += input.readVInt();
|
||||
if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
|
||||
// #positions must be >= #postings.
|
||||
throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
|
||||
+ " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
|
||||
public void write(DataOutput output, FieldMetadata fieldMetadata) throws IOException {
|
||||
assert fieldMetadata.dictionaryStartFP >= 0;
|
||||
assert fieldMetadata.firstBlockStartFP >= 0;
|
||||
assert fieldMetadata.lastBlockStartFP >= 0;
|
||||
assert fieldMetadata.numTerms > 0 : "There should be at least one term for field " + fieldMetadata.fieldInfo.name + ": " + fieldMetadata.numTerms;
|
||||
assert fieldMetadata.firstBlockStartFP <= fieldMetadata.lastBlockStartFP : "start: " + fieldMetadata.firstBlockStartFP + " end: " + fieldMetadata.lastBlockStartFP;
|
||||
assert fieldMetadata.lastTerm != null : "you must set the last term";
|
||||
|
||||
output.writeVInt(fieldMetadata.fieldInfo.number);
|
||||
|
||||
output.writeVInt(fieldMetadata.numTerms);
|
||||
output.writeVInt(fieldMetadata.sumDocFreq);
|
||||
|
||||
if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
|
||||
assert fieldMetadata.sumTotalTermFreq >= fieldMetadata.sumDocFreq : "sumTotalFQ: " + fieldMetadata.sumTotalTermFreq + " sumDocFQ: " + fieldMetadata.sumDocFreq;
|
||||
output.writeVInt(fieldMetadata.sumTotalTermFreq - fieldMetadata.sumDocFreq);
|
||||
}
|
||||
|
||||
output.writeVInt(fieldMetadata.getDocCount());
|
||||
|
||||
output.writeVLong(fieldMetadata.dictionaryStartFP);
|
||||
output.writeVLong(fieldMetadata.firstBlockStartFP);
|
||||
output.writeVLong(fieldMetadata.lastBlockStartFP);
|
||||
|
||||
if (fieldMetadata.lastTerm.length > 0) {
|
||||
output.writeVInt(fieldMetadata.lastTerm.length);
|
||||
output.writeBytes(fieldMetadata.lastTerm.bytes, fieldMetadata.lastTerm.offset, fieldMetadata.lastTerm.length);
|
||||
} else {
|
||||
output.writeVInt(0);
|
||||
}
|
||||
}
|
||||
|
||||
fieldMetadata.docCount = input.readVInt();
|
||||
if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
|
||||
// #docs with field must be <= #docs.
|
||||
throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
|
||||
+ " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
|
||||
}
|
||||
if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
|
||||
// #postings must be >= #docs with field.
|
||||
throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
|
||||
+ " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
|
||||
}
|
||||
public FieldMetadata read(DataInput input, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
|
||||
int fieldId = input.readVInt();
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldId);
|
||||
if (fieldInfo == null) {
|
||||
throw new CorruptIndexException("Illegal field id= " + fieldId, input);
|
||||
}
|
||||
FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 0, false);
|
||||
|
||||
fieldMetadata.dictionaryStartFP = input.readVLong();
|
||||
fieldMetadata.firstBlockStartFP = input.readVLong();
|
||||
fieldMetadata.lastBlockStartFP = input.readVLong();
|
||||
fieldMetadata.numTerms = input.readVInt();
|
||||
if (fieldMetadata.numTerms <= 0) {
|
||||
throw new CorruptIndexException("Illegal number of terms= " + fieldMetadata.numTerms + " for field= " + fieldId, input);
|
||||
}
|
||||
|
||||
int lastTermLength = input.readVInt();
|
||||
BytesRef lastTerm = new BytesRef(lastTermLength);
|
||||
if (lastTermLength > 0) {
|
||||
input.readBytes(lastTerm.bytes, 0, lastTermLength);
|
||||
lastTerm.length = lastTermLength;
|
||||
} else if (lastTermLength < 0) {
|
||||
throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for field= " + fieldId, input);
|
||||
}
|
||||
fieldMetadata.setLastTerm(lastTerm);
|
||||
fieldMetadata.sumDocFreq = input.readVInt();
|
||||
fieldMetadata.sumTotalTermFreq = fieldMetadata.sumDocFreq;
|
||||
if (fieldMetadata.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
|
||||
fieldMetadata.sumTotalTermFreq += input.readVInt();
|
||||
if (fieldMetadata.sumTotalTermFreq < fieldMetadata.sumDocFreq) {
|
||||
// #positions must be >= #postings.
|
||||
throw new CorruptIndexException("Illegal sumTotalTermFreq= " + fieldMetadata.sumTotalTermFreq
|
||||
+ " sumDocFreq= " + fieldMetadata.sumDocFreq + " for field= " + fieldId, input);
|
||||
}
|
||||
}
|
||||
|
||||
return fieldMetadata;
|
||||
}
|
||||
fieldMetadata.docCount = input.readVInt();
|
||||
if (fieldMetadata.docCount < 0 || fieldMetadata.docCount > maxNumDocs) {
|
||||
// #docs with field must be <= #docs.
|
||||
throw new CorruptIndexException("Illegal number of docs= " + fieldMetadata.docCount
|
||||
+ " maxNumDocs= " + maxNumDocs + " for field=" + fieldId, input);
|
||||
}
|
||||
if (fieldMetadata.sumDocFreq < fieldMetadata.docCount) {
|
||||
// #postings must be >= #docs with field.
|
||||
throw new CorruptIndexException("Illegal sumDocFreq= " + fieldMetadata.sumDocFreq
|
||||
+ " docCount= " + fieldMetadata.docCount + " for field= " + fieldId, input);
|
||||
}
|
||||
|
||||
public void write(DataOutput output) throws IOException {
|
||||
assert dictionaryStartFP >= 0;
|
||||
assert firstBlockStartFP >= 0;
|
||||
assert lastBlockStartFP >= 0;
|
||||
assert numTerms > 0 : "There should be at least one term for field " + fieldInfo.name + ": " + numTerms;
|
||||
assert firstBlockStartFP <= lastBlockStartFP : "start: " + firstBlockStartFP + " end: " + lastBlockStartFP;
|
||||
assert lastTerm != null : "you must set the last term";
|
||||
fieldMetadata.dictionaryStartFP = input.readVLong();
|
||||
fieldMetadata.firstBlockStartFP = input.readVLong();
|
||||
fieldMetadata.lastBlockStartFP = input.readVLong();
|
||||
|
||||
output.writeVInt(fieldInfo.number);
|
||||
int lastTermLength = input.readVInt();
|
||||
BytesRef lastTerm = new BytesRef(lastTermLength);
|
||||
if (lastTermLength > 0) {
|
||||
input.readBytes(lastTerm.bytes, 0, lastTermLength);
|
||||
lastTerm.length = lastTermLength;
|
||||
} else if (lastTermLength < 0) {
|
||||
throw new CorruptIndexException("Illegal last term length= " + lastTermLength + " for field= " + fieldId, input);
|
||||
}
|
||||
fieldMetadata.setLastTerm(lastTerm);
|
||||
|
||||
output.writeVInt(numTerms);
|
||||
output.writeVInt(sumDocFreq);
|
||||
|
||||
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0) {
|
||||
assert sumTotalTermFreq >= sumDocFreq : "sumTotalFQ: " + sumTotalTermFreq + " sumDocFQ: " + sumDocFreq;
|
||||
output.writeVInt(sumTotalTermFreq - sumDocFreq);
|
||||
}
|
||||
|
||||
output.writeVInt(getDocCount());
|
||||
|
||||
output.writeVLong(dictionaryStartFP);
|
||||
output.writeVLong(firstBlockStartFP);
|
||||
output.writeVLong(lastBlockStartFP);
|
||||
|
||||
if (lastTerm.length > 0) {
|
||||
output.writeVInt(lastTerm.length);
|
||||
output.writeBytes(lastTerm.bytes, lastTerm.offset, lastTerm.length);
|
||||
} else {
|
||||
output.writeVInt(0);
|
||||
return fieldMetadata;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -69,7 +69,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
|
|||
* It can be used for decompression or decryption.
|
||||
*/
|
||||
public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
|
||||
this(postingsReader, state, blockDecoder, NAME, VERSION_START, VERSION_CURRENT,
|
||||
this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
|
||||
TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
|
||||
}
|
||||
|
||||
|
@ -77,8 +77,10 @@ public class UniformSplitTermsReader extends FieldsProducer {
|
|||
* @param blockDecoder Optional block decoder, may be null if none.
|
||||
* It can be used for decompression or decryption.
|
||||
*/
|
||||
protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
|
||||
String codecName, int versionStart, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
|
||||
protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
|
||||
BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
|
||||
String codecName, int versionStart, int versionCurrent,
|
||||
String termsBlocksExtension, String dictionaryExtension) throws IOException {
|
||||
IndexInput dictionaryInput = null;
|
||||
IndexInput blockInput = null;
|
||||
boolean success = false;
|
||||
|
@ -100,7 +102,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
|
|||
CodecUtil.retrieveChecksum(blockInput);
|
||||
|
||||
seekFieldsMetadata(blockInput);
|
||||
Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput, state.fieldInfos, state.segmentInfo.maxDoc());
|
||||
Collection<FieldMetadata> fieldMetadataCollection = parseFieldsMetadata(blockInput, state.fieldInfos, fieldMetadataReader, state.segmentInfo.maxDoc());
|
||||
|
||||
fieldToTermsMap = new HashMap<>();
|
||||
this.blockInput = blockInput;
|
||||
|
@ -133,19 +135,19 @@ public class UniformSplitTermsReader extends FieldsProducer {
|
|||
* @param indexInput {@link IndexInput} must be positioned to the fields metadata
|
||||
* details by calling {@link #seekFieldsMetadata(IndexInput)} before this call.
|
||||
*/
|
||||
protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput, FieldInfos fieldInfos, int maxNumDocs) throws IOException {
|
||||
protected static Collection<FieldMetadata> parseFieldsMetadata(IndexInput indexInput, FieldInfos fieldInfos,
|
||||
FieldMetadata.Serializer fieldMetadataReader, int maxNumDocs) throws IOException {
|
||||
int numFields = indexInput.readVInt();
|
||||
if (numFields < 0) {
|
||||
throw new CorruptIndexException("Illegal number of fields= " + numFields, indexInput);
|
||||
}
|
||||
Collection<FieldMetadata> fieldMetadataCollection = new ArrayList<>(numFields);
|
||||
for (int i = 0; i < numFields; i++) {
|
||||
fieldMetadataCollection.add(FieldMetadata.read(indexInput, fieldInfos, maxNumDocs));
|
||||
fieldMetadataCollection.add(fieldMetadataReader.read(indexInput, fieldInfos, maxNumDocs));
|
||||
}
|
||||
return fieldMetadataCollection;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
|
|
|
@ -128,6 +128,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
|
|||
protected final int deltaNumLines;
|
||||
|
||||
protected final BlockEncoder blockEncoder;
|
||||
protected final FieldMetadata.Serializer fieldMetadataWriter;
|
||||
protected final IndexOutput blockOutput;
|
||||
protected final IndexOutput dictionaryOutput;
|
||||
|
||||
|
@ -146,7 +147,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
|
|||
*/
|
||||
public UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
|
||||
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
|
||||
this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder,
|
||||
this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
|
||||
NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
|
||||
}
|
||||
|
||||
|
@ -164,7 +165,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
|
|||
* It can be used for compression or encryption.
|
||||
*/
|
||||
protected UniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
|
||||
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
|
||||
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
|
||||
String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
|
||||
validateSettings(targetNumBlockLines, deltaNumLines);
|
||||
IndexOutput blockOutput = null;
|
||||
|
@ -177,6 +178,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
|
|||
this.targetNumBlockLines = targetNumBlockLines;
|
||||
this.deltaNumLines = deltaNumLines;
|
||||
this.blockEncoder = blockEncoder;
|
||||
this.fieldMetadataWriter = fieldMetadataWriter;
|
||||
|
||||
String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, termsBlocksExtension);
|
||||
blockOutput = state.directory.createOutput(termsName, state.context);
|
||||
|
@ -278,7 +280,7 @@ public class UniformSplitTermsWriter extends FieldsConsumer {
|
|||
|
||||
if (fieldMetadata.getNumTerms() > 0) {
|
||||
fieldMetadata.setLastTerm(lastTerm);
|
||||
fieldMetadata.write(fieldsOutput);
|
||||
fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
|
||||
writeDictionary(dictionaryBuilder);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -75,7 +75,7 @@ public class STBlockLine extends BlockLine {
|
|||
/**
|
||||
* Writes all the {@link BlockTermState} of the provided {@link STBlockLine} to the given output.
|
||||
*/
|
||||
public static void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
|
||||
public void writeLineTermStates(DataOutput termStatesOutput, STBlockLine line,
|
||||
DeltaBaseTermStateSerializer encoder) throws IOException {
|
||||
|
||||
FieldMetadataTermState fieldMetadataTermState;
|
||||
|
@ -111,7 +111,7 @@ public class STBlockLine extends BlockLine {
|
|||
* @return The {@link BlockTermState} corresponding to the provided field id; or null if the field
|
||||
* does not occur in the line.
|
||||
*/
|
||||
public static BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
|
||||
public BlockTermState readTermStateForField(int fieldId, DataInput termStatesInput,
|
||||
DeltaBaseTermStateSerializer termStateSerializer,
|
||||
BlockHeader blockHeader, FieldInfos fieldInfos,
|
||||
BlockTermState reuse) throws IOException {
|
||||
|
@ -161,7 +161,7 @@ public class STBlockLine extends BlockLine {
|
|||
* @param fieldTermStatesMap Map filled with the term states for each field. It is cleared first.
|
||||
* @see #readTermStateForField
|
||||
*/
|
||||
public static void readFieldTermStatesMap(DataInput termStatesInput,
|
||||
public void readFieldTermStatesMap(DataInput termStatesInput,
|
||||
DeltaBaseTermStateSerializer termStateSerializer,
|
||||
BlockHeader blockHeader,
|
||||
FieldInfos fieldInfos,
|
||||
|
@ -183,7 +183,7 @@ public class STBlockLine extends BlockLine {
|
|||
/**
|
||||
* Reads all the field ids in the current block line of the provided input.
|
||||
*/
|
||||
public static int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
|
||||
public int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
|
||||
int[] fieldIds = new int[numFields];
|
||||
for (int i = 0; i < numFields; i++) {
|
||||
fieldIds[i] = termStatesInput.readVInt();
|
||||
|
|
|
@ -117,6 +117,11 @@ public class STBlockReader extends BlockReader {
|
|||
return blockStartFP > fieldMetadata.getLastBlockStartFP() || super.isBeyondLastTerm(searchedTerm, blockStartFP);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected STBlockLine.Serializer createBlockLineSerializer() {
|
||||
return new STBlockLine.Serializer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the {@link BlockTermState} on the current line for this reader's field.
|
||||
*
|
||||
|
@ -125,7 +130,7 @@ public class STBlockReader extends BlockReader {
|
|||
@Override
|
||||
protected BlockTermState readTermState() throws IOException {
|
||||
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
|
||||
return termState = STBlockLine.Serializer.readTermStateForField(
|
||||
return termState = ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
|
||||
fieldMetadata.getFieldInfo().number,
|
||||
termStatesReadBuffer,
|
||||
termStateSerializer,
|
||||
|
|
|
@ -84,10 +84,15 @@ public class STBlockWriter extends BlockWriter {
|
|||
super.finishLastBlock(dictionaryBuilder);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BlockLine.Serializer createBlockLineSerializer() {
|
||||
return new STBlockLine.Serializer();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeBlockLine(boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine) throws IOException {
|
||||
STBlockLine.Serializer.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
|
||||
STBlockLine.Serializer.writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
|
||||
blockLineWriter.writeLine(blockLinesWriteBuffer, line, previousLine, Math.toIntExact(termStatesWriteBuffer.size()), isIncrementalEncodingSeed);
|
||||
((STBlockLine.Serializer) blockLineWriter).writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
|
||||
((STBlockLine) line).collectFields(fieldsInBlock);
|
||||
}
|
||||
|
||||
|
|
|
@ -91,6 +91,11 @@ public class STIntersectBlockReader extends IntersectBlockReader {
|
|||
return super.nextBlockMatchingPrefix() && blockHeader != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected STBlockLine.Serializer createBlockLineSerializer() {
|
||||
return new STBlockLine.Serializer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the {@link BlockTermState} on the current line for the specific field
|
||||
* corresponding this this reader.
|
||||
|
@ -100,7 +105,7 @@ public class STIntersectBlockReader extends IntersectBlockReader {
|
|||
@Override
|
||||
protected BlockTermState readTermState() throws IOException {
|
||||
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
|
||||
return STBlockLine.Serializer.readTermStateForField(
|
||||
return ((STBlockLine.Serializer) blockLineReader).readTermStateForField(
|
||||
fieldMetadata.getFieldInfo().number,
|
||||
termStatesReadBuffer,
|
||||
termStateSerializer,
|
||||
|
|
|
@ -98,7 +98,7 @@ public class STMergingBlockReader extends STBlockReader {
|
|||
public void readFieldTermStatesMap(Map<String, BlockTermState> fieldTermStatesMap) throws IOException {
|
||||
if (term() != null) {
|
||||
termStatesReadBuffer.setPosition(blockFirstLineStart + blockHeader.getTermStatesBaseOffset() + blockLine.getTermStateRelativeOffset());
|
||||
STBlockLine.Serializer.readFieldTermStatesMap(
|
||||
((STBlockLine.Serializer) blockLineReader).readFieldTermStatesMap(
|
||||
termStatesReadBuffer,
|
||||
termStateSerializer,
|
||||
blockHeader,
|
||||
|
|
|
@ -47,13 +47,15 @@ import static org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPo
|
|||
public class STUniformSplitTermsReader extends UniformSplitTermsReader {
|
||||
|
||||
public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
|
||||
super(postingsReader, state, blockDecoder, NAME, VERSION_START,
|
||||
VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
|
||||
this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE,
|
||||
NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
|
||||
}
|
||||
|
||||
protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
|
||||
String codecName, int versionStart, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
|
||||
super(postingsReader, state, blockDecoder, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
|
||||
protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
|
||||
BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
|
||||
String codecName, int versionStart, int versionCurrent,
|
||||
String termsBlocksExtension, String dictionaryExtension) throws IOException {
|
||||
super(postingsReader, state, blockDecoder, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -88,13 +88,14 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter {
|
|||
|
||||
public STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
|
||||
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws IOException {
|
||||
this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
|
||||
this(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, FieldMetadata.Serializer.INSTANCE,
|
||||
NAME, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
|
||||
}
|
||||
|
||||
protected STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state,
|
||||
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
|
||||
int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, FieldMetadata.Serializer fieldMetadataWriter,
|
||||
String codecName, int versionCurrent, String termsBlocksExtension, String dictionaryExtension) throws IOException {
|
||||
super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, codecName, versionCurrent, termsBlocksExtension, dictionaryExtension);
|
||||
super(postingsWriter, state, targetNumBlockLines, deltaNumLines, blockEncoder, fieldMetadataWriter, codecName, versionCurrent, termsBlocksExtension, dictionaryExtension);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -200,7 +201,7 @@ public class STUniformSplitTermsWriter extends UniformSplitTermsWriter {
|
|||
int fieldsNumber = 0;
|
||||
for (FieldMetadata fieldMetadata : fieldMetadataList) {
|
||||
if (fieldMetadata.getNumTerms() > 0) {
|
||||
fieldMetadata.write(fieldsOutput);
|
||||
fieldMetadataWriter.write(fieldsOutput, fieldMetadata);
|
||||
fieldsNumber++;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue