mirror of https://github.com/apache/lucene.git
All the metadata can be directly encoded in the `DataOutput`.
This commit is contained in:
parent
5f2d7c4855
commit
fb3ca8d000
|
@ -91,6 +91,12 @@ API Changes
|
|||
yield Passages sized a little different due to the fact that the sizing pivot is now the center of the first match and
|
||||
not its left edge.
|
||||
|
||||
* LUCENE-9116: PostingsWriterBase and PostingsReaderBase no longer support
|
||||
setting a field's metadata via a `long[]`. (Adrien Grand)
|
||||
|
||||
* LUCENE-9116: The FSTOrd postings format has been removed.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-8369: Remove obsolete spatial module. (Nick Knize, David Smiley)
|
||||
|
||||
New Features
|
||||
|
|
|
@ -154,7 +154,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
|
||||
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
|
||||
throws IOException {
|
||||
final IntBlockTermState termState = (IntBlockTermState) _termState;
|
||||
final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
|
@ -167,11 +167,11 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
termState.payStartFP = 0;
|
||||
}
|
||||
|
||||
termState.docStartFP += longs[0];
|
||||
termState.docStartFP += in.readVLong();
|
||||
if (fieldHasPositions) {
|
||||
termState.posStartFP += longs[1];
|
||||
termState.posStartFP += in.readVLong();
|
||||
if (fieldHasOffsets || fieldHasPayloads) {
|
||||
termState.payStartFP += longs[2];
|
||||
termState.payStartFP += in.readVLong();
|
||||
}
|
||||
}
|
||||
if (termState.docFreq == 1) {
|
||||
|
|
|
@ -187,20 +187,11 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int setField(FieldInfo fieldInfo) {
|
||||
public void setField(FieldInfo fieldInfo) {
|
||||
super.setField(fieldInfo);
|
||||
skipWriter.setField(writePositions, writeOffsets, writePayloads);
|
||||
lastState = emptyState;
|
||||
fieldHasNorms = fieldInfo.hasNorms();
|
||||
if (writePositions) {
|
||||
if (writePayloads || writeOffsets) {
|
||||
return 3; // doc + pos + pay FP
|
||||
} else {
|
||||
return 2; // doc + pos FP
|
||||
}
|
||||
} else {
|
||||
return 1; // doc FP
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -463,16 +454,16 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||
IntBlockTermState state = (IntBlockTermState)_state;
|
||||
if (absolute) {
|
||||
lastState = emptyState;
|
||||
}
|
||||
longs[0] = state.docStartFP - lastState.docStartFP;
|
||||
out.writeVLong(state.docStartFP - lastState.docStartFP);
|
||||
if (writePositions) {
|
||||
longs[1] = state.posStartFP - lastState.posStartFP;
|
||||
out.writeVLong(state.posStartFP - lastState.posStartFP);
|
||||
if (writePayloads || writeOffsets) {
|
||||
longs[2] = state.payStartFP - lastState.payStartFP;
|
||||
out.writeVLong(state.payStartFP - lastState.payStartFP);
|
||||
}
|
||||
}
|
||||
if (state.singletonDocID != -1) {
|
||||
|
|
|
@ -145,7 +145,6 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
|
||||
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||
final int docCount = in.readVInt();
|
||||
final int longsSize = in.readVInt();
|
||||
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in);
|
||||
}
|
||||
|
@ -155,7 +154,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
|
||||
}
|
||||
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
|
||||
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in);
|
||||
}
|
||||
|
@ -223,9 +222,8 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
final long sumTotalTermFreq;
|
||||
final long sumDocFreq;
|
||||
final int docCount;
|
||||
final int longsSize;
|
||||
|
||||
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
|
||||
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
|
@ -233,7 +231,6 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -326,7 +323,6 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
|
||||
private int metaDataUpto;
|
||||
|
||||
private long[] longs;
|
||||
private byte[] bytes;
|
||||
private ByteArrayDataInput bytesReader;
|
||||
|
||||
|
@ -343,7 +339,6 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
termSuffixes = new byte[128];
|
||||
docFreqBytes = new byte[64];
|
||||
//System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
|
||||
longs = new long[longsSize];
|
||||
}
|
||||
|
||||
// TODO: we may want an alternate mode here which is
|
||||
|
@ -826,10 +821,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
//System.out.println(" totTF=" + state.totalTermFreq);
|
||||
}
|
||||
// metadata
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
longs[i] = bytesReader.readVLong();
|
||||
}
|
||||
postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
|
||||
postingsReader.decodeTerm(bytesReader, fieldInfo, state, absolute);
|
||||
metaDataUpto++;
|
||||
absolute = false;
|
||||
}
|
||||
|
|
|
@ -81,9 +81,8 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
public final long sumTotalTermFreq;
|
||||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
public final int longsSize;
|
||||
|
||||
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
|
||||
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.termsStartPointer = termsStartPointer;
|
||||
|
@ -91,7 +90,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,7 +174,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
out.writeVInt(field.longsSize);
|
||||
}
|
||||
writeTrailer(dirStart);
|
||||
CodecUtil.writeFooter(out);
|
||||
|
@ -206,7 +203,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
long sumTotalTermFreq;
|
||||
long sumDocFreq;
|
||||
int docCount;
|
||||
int longsSize;
|
||||
|
||||
private TermEntry[] pendingTerms;
|
||||
|
||||
|
@ -226,7 +222,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
}
|
||||
termsStartPointer = out.getFilePointer();
|
||||
this.postingsWriter = postingsWriter;
|
||||
this.longsSize = postingsWriter.setField(fieldInfo);
|
||||
postingsWriter.setField(fieldInfo);
|
||||
}
|
||||
|
||||
private final BytesRefBuilder lastPrevTerm = new BytesRefBuilder();
|
||||
|
@ -285,8 +281,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
termsStartPointer,
|
||||
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 ? sumTotalTermFreq : -1,
|
||||
sumDocFreq,
|
||||
docsSeen.cardinality(),
|
||||
longsSize));
|
||||
docsSeen.cardinality()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -307,7 +302,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
}
|
||||
|
||||
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput bufferWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
|
||||
private void flushBlock() throws IOException {
|
||||
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
|
||||
|
@ -353,16 +347,10 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
|
|||
bytesWriter.reset();
|
||||
|
||||
// 4th pass: write the metadata
|
||||
long[] longs = new long[longsSize];
|
||||
boolean absolute = true;
|
||||
for(int termCount=0;termCount<pendingCount;termCount++) {
|
||||
final BlockTermState state = pendingTerms[termCount].state;
|
||||
postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
|
||||
for (int i = 0; i < longsSize; i++) {
|
||||
bytesWriter.writeVLong(longs[i]);
|
||||
}
|
||||
bufferWriter.copyTo(bytesWriter);
|
||||
bufferWriter.reset();
|
||||
postingsWriter.encodeTerm(bytesWriter, fieldInfo, state, absolute);
|
||||
absolute = false;
|
||||
}
|
||||
out.writeVInt(Math.toIntExact(bytesWriter.size()));
|
||||
|
|
|
@ -130,7 +130,6 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
|||
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
|
||||
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||
final int docCount = in.readVInt();
|
||||
final int longsSize = in.readVInt();
|
||||
// System.out.println(" longsSize=" + longsSize);
|
||||
|
||||
BytesRef minTerm = readBytesRef(in);
|
||||
|
@ -147,7 +146,7 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
|||
final long indexStartFP = indexIn.readVLong();
|
||||
OrdsFieldReader previous = fields.put(fieldInfo.name,
|
||||
new OrdsFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
|
||||
indexStartFP, longsSize, indexIn, minTerm, maxTerm));
|
||||
indexStartFP, indexIn, minTerm, maxTerm));
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
|
||||
}
|
||||
|
|
|
@ -143,12 +143,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
public final long sumTotalTermFreq;
|
||||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
private final int longsSize;
|
||||
public final BytesRef minTerm;
|
||||
public final BytesRef maxTerm;
|
||||
|
||||
public FieldMetaData(FieldInfo fieldInfo, Output rootCode, long numTerms, long indexStartFP,
|
||||
long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize,
|
||||
long sumTotalTermFreq, long sumDocFreq, int docCount,
|
||||
BytesRef minTerm, BytesRef maxTerm) {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
|
@ -159,7 +158,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
}
|
||||
|
@ -424,7 +422,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
class TermsWriter {
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int longsSize;
|
||||
private long numTerms;
|
||||
final FixedBitSet docsSeen;
|
||||
long sumTotalTermFreq;
|
||||
|
@ -439,8 +436,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
|
||||
private int[] prefixStarts = new int[8];
|
||||
|
||||
private final long[] longs;
|
||||
|
||||
// Pending stack of terms and blocks. As terms arrive (in sorted order)
|
||||
// we append to this stack, and once the top of the stack has enough
|
||||
// terms starting with a common prefix, we write a new block with
|
||||
|
@ -633,13 +628,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
// Write term meta data
|
||||
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
assert longs[pos] >= 0;
|
||||
metaWriter.writeVLong(longs[pos]);
|
||||
}
|
||||
bytesWriter.copyTo(metaWriter);
|
||||
bytesWriter.reset();
|
||||
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
|
||||
absolute = false;
|
||||
}
|
||||
totalTermCount = end-start;
|
||||
|
@ -684,13 +673,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
// separate anymore:
|
||||
|
||||
// Write term meta data
|
||||
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
assert longs[pos] >= 0;
|
||||
metaWriter.writeVLong(longs[pos]);
|
||||
}
|
||||
bytesWriter.copyTo(metaWriter);
|
||||
bytesWriter.reset();
|
||||
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
|
||||
absolute = false;
|
||||
|
||||
totalTermCount++;
|
||||
|
@ -763,8 +746,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
TermsWriter(FieldInfo fieldInfo) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
docsSeen = new FixedBitSet(maxDoc);
|
||||
this.longsSize = postingsWriter.setField(fieldInfo);
|
||||
this.longs = new long[longsSize];
|
||||
postingsWriter.setField(fieldInfo);
|
||||
}
|
||||
|
||||
/** Writes one term's worth of postings. */
|
||||
|
@ -874,7 +856,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
sumTotalTermFreq,
|
||||
sumDocFreq,
|
||||
docsSeen.cardinality(),
|
||||
longsSize,
|
||||
minTerm, maxTerm));
|
||||
} else {
|
||||
assert docsSeen.cardinality() == 0;
|
||||
|
@ -884,7 +865,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
}
|
||||
|
||||
private boolean closed;
|
||||
|
@ -916,7 +896,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
out.writeVInt(field.longsSize);
|
||||
indexOut.writeVLong(field.indexStartFP);
|
||||
writeBytesRef(out, field.minTerm);
|
||||
writeBytesRef(out, field.maxTerm);
|
||||
|
|
|
@ -46,7 +46,6 @@ final class OrdsFieldReader extends Terms implements Accountable {
|
|||
final Output rootCode;
|
||||
final BytesRef minTerm;
|
||||
final BytesRef maxTerm;
|
||||
final int longsSize;
|
||||
final OrdsBlockTreeTermsReader parent;
|
||||
|
||||
final FST<Output> index;
|
||||
|
@ -54,7 +53,7 @@ final class OrdsFieldReader extends Terms implements Accountable {
|
|||
|
||||
OrdsFieldReader(OrdsBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms,
|
||||
Output rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
|
||||
long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
|
||||
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
|
||||
|
@ -65,7 +64,6 @@ final class OrdsFieldReader extends Terms implements Accountable {
|
|||
this.docCount = docCount;
|
||||
this.indexStartFP = indexStartFP;
|
||||
this.rootCode = rootCode;
|
||||
this.longsSize = longsSize;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
// if (DEBUG) {
|
||||
|
|
|
@ -84,9 +84,7 @@ final class OrdsIntersectTermsEnumFrame {
|
|||
|
||||
final BlockTermState termState;
|
||||
|
||||
// metadata buffer, holding monotonic values
|
||||
public long[] longs;
|
||||
// metadata buffer, holding general values
|
||||
// metadata
|
||||
public byte[] bytes;
|
||||
ByteArrayDataInput bytesReader;
|
||||
|
||||
|
@ -103,7 +101,6 @@ final class OrdsIntersectTermsEnumFrame {
|
|||
this.ord = ord;
|
||||
this.termState = ite.fr.parent.postingsReader.newTermState();
|
||||
this.termState.totalTermFreq = -1;
|
||||
this.longs = new long[ite.fr.longsSize];
|
||||
}
|
||||
|
||||
void loadNextFloorBlock() throws IOException {
|
||||
|
@ -298,11 +295,8 @@ final class OrdsIntersectTermsEnumFrame {
|
|||
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
|
||||
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
||||
}
|
||||
// metadata
|
||||
for (int i = 0; i < ite.fr.longsSize; i++) {
|
||||
longs[i] = bytesReader.readVLong();
|
||||
}
|
||||
ite.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ite.fr.fieldInfo, termState, absolute);
|
||||
// metadata
|
||||
ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute);
|
||||
|
||||
metaDataUpto++;
|
||||
absolute = false;
|
||||
|
|
|
@ -97,9 +97,7 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
|
||||
final BlockTermState state;
|
||||
|
||||
// metadata buffer, holding monotonic values
|
||||
public long[] longs;
|
||||
// metadata buffer, holding general values
|
||||
// metadata
|
||||
public byte[] bytes;
|
||||
ByteArrayDataInput bytesReader;
|
||||
|
||||
|
@ -110,7 +108,6 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
this.ord = ord;
|
||||
this.state = ste.fr.parent.postingsReader.newTermState();
|
||||
this.state.totalTermFreq = -1;
|
||||
this.longs = new long[ste.fr.longsSize];
|
||||
}
|
||||
|
||||
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
|
||||
|
@ -507,11 +504,8 @@ final class OrdsSegmentTermsEnumFrame {
|
|||
}
|
||||
//if (DEBUG) System.out.println(" longsSize=" + ste.fr.longsSize);
|
||||
|
||||
// metadata
|
||||
for (int i = 0; i < ste.fr.longsSize; i++) {
|
||||
longs[i] = bytesReader.readVLong();
|
||||
}
|
||||
ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute);
|
||||
// metadata
|
||||
ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute);
|
||||
|
||||
metaDataUpto++;
|
||||
absolute = false;
|
||||
|
|
|
@ -1,78 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.lucene84.Lucene84PostingsReader;
|
||||
import org.apache.lucene.codecs.lucene84.Lucene84PostingsWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* FSTOrd term dict + Lucene50PBF
|
||||
*/
|
||||
|
||||
public final class FSTOrdPostingsFormat extends PostingsFormat {
|
||||
public FSTOrdPostingsFormat() {
|
||||
super("FSTOrd50");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new FSTOrdTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new FSTOrdTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,884 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Outputs;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
/**
|
||||
* FST-based terms dictionary reader.
|
||||
*
|
||||
* The FST index maps each term and its ord, and during seek
|
||||
* the ord is used to fetch metadata from a single block.
|
||||
* The term dictionary is fully memory resident.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FSTOrdTermsReader extends FieldsProducer {
|
||||
static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL;
|
||||
final TreeMap<String, TermsReader> fields = new TreeMap<>();
|
||||
final PostingsReaderBase postingsReader;
|
||||
//static final boolean TEST = false;
|
||||
|
||||
public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
|
||||
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
|
||||
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
|
||||
|
||||
this.postingsReader = postingsReader;
|
||||
ChecksumIndexInput indexIn = null;
|
||||
IndexInput blockIn = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context);
|
||||
blockIn = state.directory.openInput(termsBlockFileName, state.context);
|
||||
int version = CodecUtil.checkIndexHeader(indexIn, FSTOrdTermsWriter.TERMS_INDEX_CODEC_NAME,
|
||||
FSTOrdTermsWriter.VERSION_START,
|
||||
FSTOrdTermsWriter.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
int version2 = CodecUtil.checkIndexHeader(blockIn, FSTOrdTermsWriter.TERMS_CODEC_NAME,
|
||||
FSTOrdTermsWriter.VERSION_START,
|
||||
FSTOrdTermsWriter.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
|
||||
if (version != version2) {
|
||||
throw new CorruptIndexException("Format versions mismatch: index=" + version + ", terms=" + version2, blockIn);
|
||||
}
|
||||
|
||||
CodecUtil.checksumEntireFile(blockIn);
|
||||
|
||||
this.postingsReader.init(blockIn, state);
|
||||
seekDir(blockIn);
|
||||
|
||||
final FieldInfos fieldInfos = state.fieldInfos;
|
||||
final int numFields = blockIn.readVInt();
|
||||
for (int i = 0; i < numFields; i++) {
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt());
|
||||
boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
|
||||
long numTerms = blockIn.readVLong();
|
||||
long sumTotalTermFreq = blockIn.readVLong();
|
||||
// if freqs are omitted, sumDocFreq=sumTotalTermFreq and we only write one value
|
||||
long sumDocFreq = hasFreq ? blockIn.readVLong() : sumTotalTermFreq;
|
||||
int docCount = blockIn.readVInt();
|
||||
int longsSize = blockIn.readVInt();
|
||||
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
|
||||
|
||||
TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
|
||||
TermsReader previous = fields.put(fieldInfo.name, current);
|
||||
checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous);
|
||||
}
|
||||
CodecUtil.checkFooter(indexIn);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(indexIn, blockIn);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(indexIn, blockIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void seekDir(IndexInput in) throws IOException {
|
||||
in.seek(in.length() - CodecUtil.footerLength() - 8);
|
||||
in.seek(in.readLong());
|
||||
}
|
||||
private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException {
|
||||
// #docs with field must be <= #docs
|
||||
if (field.docCount < 0 || field.docCount > info.maxDoc()) {
|
||||
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.maxDoc() + " (blockIn=" + blockIn + ")", indexIn);
|
||||
}
|
||||
// #postings must be >= #docs with field
|
||||
if (field.sumDocFreq < field.docCount) {
|
||||
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn);
|
||||
}
|
||||
// #positions must be >= #postings
|
||||
if (field.sumTotalTermFreq < field.sumDocFreq) {
|
||||
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn);
|
||||
}
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (blockIn=" + blockIn + ")", indexIn);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.unmodifiableSet(fields.keySet()).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
assert field != null;
|
||||
return fields.get(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return fields.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
IOUtils.close(postingsReader);
|
||||
} finally {
|
||||
fields.clear();
|
||||
}
|
||||
}
|
||||
|
||||
final class TermsReader extends Terms implements Accountable {
|
||||
final FieldInfo fieldInfo;
|
||||
final long numTerms;
|
||||
final long sumTotalTermFreq;
|
||||
final long sumDocFreq;
|
||||
final int docCount;
|
||||
final int longsSize;
|
||||
final FST<Long> index;
|
||||
|
||||
final int numSkipInfo;
|
||||
final long[] skipInfo;
|
||||
final byte[] statsBlock;
|
||||
final byte[] metaLongsBlock;
|
||||
final byte[] metaBytesBlock;
|
||||
|
||||
TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.index = index;
|
||||
|
||||
assert (numTerms & (~0xffffffffL)) == 0;
|
||||
final int numBlocks = (int)(numTerms + INTERVAL - 1) / INTERVAL;
|
||||
this.numSkipInfo = longsSize + 3;
|
||||
this.skipInfo = new long[numBlocks * numSkipInfo];
|
||||
this.statsBlock = new byte[(int)blockIn.readVLong()];
|
||||
this.metaLongsBlock = new byte[(int)blockIn.readVLong()];
|
||||
this.metaBytesBlock = new byte[(int)blockIn.readVLong()];
|
||||
|
||||
int last = 0, next = 0;
|
||||
for (int i = 1; i < numBlocks; i++) {
|
||||
next = numSkipInfo * i;
|
||||
for (int j = 0; j < numSkipInfo; j++) {
|
||||
skipInfo[next + j] = skipInfo[last + j] + blockIn.readVLong();
|
||||
}
|
||||
last = next;
|
||||
}
|
||||
blockIn.readBytes(statsBlock, 0, statsBlock.length);
|
||||
blockIn.readBytes(metaLongsBlock, 0, metaLongsBlock.length);
|
||||
blockIn.readBytes(metaBytesBlock, 0, metaBytesBlock.length);
|
||||
}
|
||||
|
||||
public boolean hasFreqs() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasOffsets() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPositions() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return fieldInfo.hasPayloads();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() {
|
||||
return numTerms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() {
|
||||
return sumTotalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumDocFreq() throws IOException {
|
||||
return sumDocFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount() throws IOException {
|
||||
return docCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
return new SegmentTermsEnum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
|
||||
}
|
||||
return new IntersectTermsEnum(compiled, startTerm);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long ramBytesUsed = 0;
|
||||
if (index != null) {
|
||||
ramBytesUsed += index.ramBytesUsed();
|
||||
ramBytesUsed += RamUsageEstimator.sizeOf(metaBytesBlock);
|
||||
ramBytesUsed += RamUsageEstimator.sizeOf(metaLongsBlock);
|
||||
ramBytesUsed += RamUsageEstimator.sizeOf(skipInfo);
|
||||
ramBytesUsed += RamUsageEstimator.sizeOf(statsBlock);
|
||||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
if (index == null) {
|
||||
return Collections.emptyList();
|
||||
} else {
|
||||
return Collections.singletonList(Accountables.namedAccountable("terms", index));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FSTOrdTerms(terms=" + numTerms + ",postings=" + sumDocFreq + ",positions=" + sumTotalTermFreq + ",docs=" + docCount + ")";
|
||||
}
|
||||
|
||||
// Only wraps common operations for PBF interact
|
||||
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
|
||||
|
||||
/* Current term's ord, starts from 0 */
|
||||
long ord;
|
||||
|
||||
/* Current term stats + decoded metadata (customized by PBF) */
|
||||
final BlockTermState state;
|
||||
|
||||
/* Datainput to load stats & metadata */
|
||||
final ByteArrayDataInput statsReader = new ByteArrayDataInput();
|
||||
final ByteArrayDataInput metaLongsReader = new ByteArrayDataInput();
|
||||
final ByteArrayDataInput metaBytesReader = new ByteArrayDataInput();
|
||||
|
||||
/* To which block is buffered */
|
||||
int statsBlockOrd;
|
||||
int metaBlockOrd;
|
||||
|
||||
/* Current buffered metadata (long[] & byte[]) */
|
||||
long[][] longs;
|
||||
int[] bytesStart;
|
||||
int[] bytesLength;
|
||||
|
||||
/* Current buffered stats (df & ttf) */
|
||||
int[] docFreq;
|
||||
long[] totalTermFreq;
|
||||
|
||||
BaseTermsEnum() throws IOException {
|
||||
this.state = postingsReader.newTermState();
|
||||
this.statsReader.reset(statsBlock);
|
||||
this.metaLongsReader.reset(metaLongsBlock);
|
||||
this.metaBytesReader.reset(metaBytesBlock);
|
||||
|
||||
this.longs = new long[INTERVAL][longsSize];
|
||||
this.bytesStart = new int[INTERVAL];
|
||||
this.bytesLength = new int[INTERVAL];
|
||||
this.docFreq = new int[INTERVAL];
|
||||
this.totalTermFreq = new long[INTERVAL];
|
||||
this.statsBlockOrd = -1;
|
||||
this.metaBlockOrd = -1;
|
||||
}
|
||||
|
||||
/** Decodes stats data into term state */
|
||||
void decodeStats() throws IOException {
|
||||
final int upto = (int)ord % INTERVAL;
|
||||
final int oldBlockOrd = statsBlockOrd;
|
||||
statsBlockOrd = (int)ord / INTERVAL;
|
||||
if (oldBlockOrd != statsBlockOrd) {
|
||||
refillStats();
|
||||
}
|
||||
state.docFreq = docFreq[upto];
|
||||
state.totalTermFreq = totalTermFreq[upto];
|
||||
}
|
||||
|
||||
/** Let PBF decode metadata */
|
||||
void decodeMetaData() throws IOException {
|
||||
final int upto = (int)ord % INTERVAL;
|
||||
final int oldBlockOrd = metaBlockOrd;
|
||||
metaBlockOrd = (int)ord / INTERVAL;
|
||||
if (metaBlockOrd != oldBlockOrd) {
|
||||
refillMetadata();
|
||||
}
|
||||
metaBytesReader.setPosition(bytesStart[upto]);
|
||||
postingsReader.decodeTerm(longs[upto], metaBytesReader, fieldInfo, state, true);
|
||||
}
|
||||
|
||||
/** Load current stats shard */
|
||||
final void refillStats() throws IOException {
|
||||
final int offset = statsBlockOrd * numSkipInfo;
|
||||
final int statsFP = (int)skipInfo[offset];
|
||||
statsReader.setPosition(statsFP);
|
||||
for (int i = 0; i < INTERVAL && !statsReader.eof(); i++) {
|
||||
int code = statsReader.readVInt();
|
||||
if (hasFreqs()) {
|
||||
docFreq[i] = (code >>> 1);
|
||||
if ((code & 1) == 1) {
|
||||
totalTermFreq[i] = docFreq[i];
|
||||
} else {
|
||||
totalTermFreq[i] = docFreq[i] + statsReader.readVLong();
|
||||
}
|
||||
} else {
|
||||
docFreq[i] = code;
|
||||
totalTermFreq[i] = code;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Load current metadata shard */
|
||||
final void refillMetadata() throws IOException {
|
||||
final int offset = metaBlockOrd * numSkipInfo;
|
||||
final int metaLongsFP = (int)skipInfo[offset + 1];
|
||||
final int metaBytesFP = (int)skipInfo[offset + 2];
|
||||
metaLongsReader.setPosition(metaLongsFP);
|
||||
for (int j = 0; j < longsSize; j++) {
|
||||
longs[0][j] = skipInfo[offset + 3 + j] + metaLongsReader.readVLong();
|
||||
}
|
||||
bytesStart[0] = metaBytesFP;
|
||||
bytesLength[0] = (int)metaLongsReader.readVLong();
|
||||
for (int i = 1; i < INTERVAL && !metaLongsReader.eof(); i++) {
|
||||
for (int j = 0; j < longsSize; j++) {
|
||||
longs[i][j] = longs[i-1][j] + metaLongsReader.readVLong();
|
||||
}
|
||||
bytesStart[i] = bytesStart[i-1] + bytesLength[i-1];
|
||||
bytesLength[i] = (int)metaLongsReader.readVLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermState termState() throws IOException {
|
||||
decodeMetaData();
|
||||
return state.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq() throws IOException {
|
||||
return state.docFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long totalTermFreq() throws IOException {
|
||||
return state.totalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
|
||||
decodeMetaData();
|
||||
return postingsReader.postings(fieldInfo, state, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
decodeMetaData();
|
||||
return postingsReader.impacts(fieldInfo, state, flags);
|
||||
}
|
||||
|
||||
// TODO: this can be achieved by making use of Util.getByOutput()
|
||||
// and should have related tests
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
// Iterates through all terms in this field
|
||||
private final class SegmentTermsEnum extends BaseTermsEnum {
|
||||
final BytesRefFSTEnum<Long> fstEnum;
|
||||
/* Current term, null when enum ends or unpositioned */
|
||||
BytesRef term;
|
||||
|
||||
/* True when current term's metadata is decoded */
|
||||
boolean decoded;
|
||||
|
||||
/* True when current enum is 'positioned' by seekExact(TermState) */
|
||||
boolean seekPending;
|
||||
|
||||
SegmentTermsEnum() throws IOException {
|
||||
this.fstEnum = new BytesRefFSTEnum<>(index);
|
||||
this.decoded = false;
|
||||
this.seekPending = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() throws IOException {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
void decodeMetaData() throws IOException {
|
||||
if (!decoded && !seekPending) {
|
||||
super.decodeMetaData();
|
||||
decoded = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Update current enum according to FSTEnum
|
||||
void updateEnum(final InputOutput<Long> pair) throws IOException {
|
||||
if (pair == null) {
|
||||
term = null;
|
||||
} else {
|
||||
term = pair.input;
|
||||
ord = pair.output;
|
||||
decodeStats();
|
||||
}
|
||||
decoded = false;
|
||||
seekPending = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (seekPending) { // previously positioned, but termOutputs not fetched
|
||||
seekPending = false;
|
||||
SeekStatus status = seekCeil(term);
|
||||
assert status == SeekStatus.FOUND; // must positioned on valid term
|
||||
}
|
||||
updateEnum(fstEnum.next());
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean seekExact(BytesRef target) throws IOException {
|
||||
updateEnum(fstEnum.seekExact(target));
|
||||
return term != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef target) throws IOException {
|
||||
updateEnum(fstEnum.seekCeil(target));
|
||||
if (term == null) {
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(BytesRef target, TermState otherState) {
|
||||
if (!target.equals(term)) {
|
||||
state.copyFrom(otherState);
|
||||
term = BytesRef.deepCopyOf(target);
|
||||
seekPending = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Iterates intersect result with automaton (cannot seek!)
|
||||
private final class IntersectTermsEnum extends BaseTermsEnum {
|
||||
/* Current term, null when enum ends or unpositioned */
|
||||
BytesRefBuilder term;
|
||||
|
||||
/* True when current term's metadata is decoded */
|
||||
boolean decoded;
|
||||
|
||||
/* True when there is pending term when calling next() */
|
||||
boolean pending;
|
||||
|
||||
/* stack to record how current term is constructed,
|
||||
* used to accumulate metadata or rewind term:
|
||||
* level == term.length + 1,
|
||||
* == 0 when term is null */
|
||||
Frame[] stack;
|
||||
int level;
|
||||
|
||||
/* term dict fst */
|
||||
final FST<Long> fst;
|
||||
final FST.BytesReader fstReader;
|
||||
final Outputs<Long> fstOutputs;
|
||||
|
||||
/* query automaton to intersect with */
|
||||
final ByteRunAutomaton fsa;
|
||||
|
||||
private final class Frame {
|
||||
/* fst stats */
|
||||
FST.Arc<Long> arc;
|
||||
|
||||
Long output;
|
||||
|
||||
/* automaton stats */
|
||||
int state;
|
||||
|
||||
Frame() {
|
||||
this.arc = new FST.Arc<>();
|
||||
this.state = -1;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "arc=" + arc + " state=" + state;
|
||||
}
|
||||
}
|
||||
|
||||
IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
//if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
|
||||
this.fst = index;
|
||||
this.fstReader = fst.getBytesReader();
|
||||
this.fstOutputs = index.outputs;
|
||||
this.fsa = compiled.runAutomaton;
|
||||
this.level = -1;
|
||||
this.stack = new Frame[16];
|
||||
for (int i = 0 ; i < stack.length; i++) {
|
||||
this.stack[i] = new Frame();
|
||||
}
|
||||
|
||||
Frame frame;
|
||||
frame = loadVirtualFrame(newFrame());
|
||||
this.level++;
|
||||
frame = loadFirstFrame(newFrame());
|
||||
pushFrame(frame);
|
||||
|
||||
this.decoded = false;
|
||||
this.pending = false;
|
||||
|
||||
if (startTerm == null) {
|
||||
pending = isAccept(topFrame());
|
||||
} else {
|
||||
doSeekCeil(startTerm);
|
||||
pending = (term == null || !startTerm.equals(term.get())) && isValid(topFrame()) && isAccept(topFrame());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() throws IOException {
|
||||
return term == null ? null : term.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
void decodeMetaData() throws IOException {
|
||||
if (!decoded) {
|
||||
super.decodeMetaData();
|
||||
decoded = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void decodeStats() throws IOException {
|
||||
ord = topFrame().output;
|
||||
super.decodeStats();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
//if (TEST) System.out.println("Enum next()");
|
||||
if (pending) {
|
||||
pending = false;
|
||||
decodeStats();
|
||||
return term();
|
||||
}
|
||||
decoded = false;
|
||||
DFS:
|
||||
while (level > 0) {
|
||||
Frame frame = newFrame();
|
||||
if (loadExpandFrame(topFrame(), frame) != null) { // has valid target
|
||||
pushFrame(frame);
|
||||
if (isAccept(frame)) { // gotcha
|
||||
break;
|
||||
}
|
||||
continue; // check next target
|
||||
}
|
||||
frame = popFrame();
|
||||
while(level > 0) {
|
||||
if (loadNextFrame(topFrame(), frame) != null) { // has valid sibling
|
||||
pushFrame(frame);
|
||||
if (isAccept(frame)) { // gotcha
|
||||
break DFS;
|
||||
}
|
||||
continue DFS; // check next target
|
||||
}
|
||||
frame = popFrame();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
decodeStats();
|
||||
return term();
|
||||
}
|
||||
|
||||
BytesRef doSeekCeil(BytesRef target) throws IOException {
|
||||
//if (TEST) System.out.println("Enum doSeekCeil()");
|
||||
Frame frame= null;
|
||||
int label, upto = 0, limit = target.length;
|
||||
while (upto < limit) { // to target prefix, or ceil label (rewind prefix)
|
||||
frame = newFrame();
|
||||
label = target.bytes[upto] & 0xff;
|
||||
frame = loadCeilFrame(label, topFrame(), frame);
|
||||
if (frame == null || frame.arc.label() != label) {
|
||||
break;
|
||||
}
|
||||
assert isValid(frame); // target must be fetched from automaton
|
||||
pushFrame(frame);
|
||||
upto++;
|
||||
}
|
||||
if (upto == limit) { // got target
|
||||
return term();
|
||||
}
|
||||
if (frame != null) { // got larger term('s prefix)
|
||||
pushFrame(frame);
|
||||
return isAccept(frame) ? term() : next();
|
||||
}
|
||||
while (level > 0) { // got target's prefix, advance to larger term
|
||||
frame = popFrame();
|
||||
while (level > 0 && !canRewind(frame)) {
|
||||
frame = popFrame();
|
||||
}
|
||||
if (loadNextFrame(topFrame(), frame) != null) {
|
||||
pushFrame(frame);
|
||||
return isAccept(frame) ? term() : next();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Virtual frame, never pop */
|
||||
Frame loadVirtualFrame(Frame frame) {
|
||||
frame.output = fstOutputs.getNoOutput();
|
||||
frame.state = -1;
|
||||
return frame;
|
||||
}
|
||||
|
||||
/** Load frame for start arc(node) on fst */
|
||||
Frame loadFirstFrame(Frame frame) {
|
||||
frame.arc = fst.getFirstArc(frame.arc);
|
||||
frame.output = frame.arc.output();
|
||||
frame.state = 0;
|
||||
return frame;
|
||||
}
|
||||
|
||||
/** Load frame for target arc(node) on fst */
|
||||
Frame loadExpandFrame(Frame top, Frame frame) throws IOException {
|
||||
if (!canGrow(top)) {
|
||||
return null;
|
||||
}
|
||||
frame.arc = fst.readFirstRealTargetArc(top.arc.target(), frame.arc, fstReader);
|
||||
frame.state = fsa.step(top.state, frame.arc.label());
|
||||
frame.output = frame.arc.output();
|
||||
//if (TEST) System.out.println(" loadExpand frame="+frame);
|
||||
if (frame.state == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
}
|
||||
return frame;
|
||||
}
|
||||
|
||||
/** Load frame for sibling arc(node) on fst */
|
||||
Frame loadNextFrame(Frame top, Frame frame) throws IOException {
|
||||
if (!canRewind(frame)) {
|
||||
return null;
|
||||
}
|
||||
while (!frame.arc.isLast()) {
|
||||
frame.arc = fst.readNextRealArc(frame.arc, fstReader);
|
||||
frame.output = frame.arc.output();
|
||||
frame.state = fsa.step(top.state, frame.arc.label());
|
||||
if (frame.state != -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
//if (TEST) System.out.println(" loadNext frame="+frame);
|
||||
if (frame.state == -1) {
|
||||
return null;
|
||||
}
|
||||
return frame;
|
||||
}
|
||||
|
||||
/** Load frame for target arc(node) on fst, so that
|
||||
* arc.label >= label and !fsa.reject(arc.label) */
|
||||
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
|
||||
FST.Arc<Long> arc = frame.arc;
|
||||
arc = Util.readCeilArc(label, fst, top.arc, arc, fstReader);
|
||||
if (arc == null) {
|
||||
return null;
|
||||
}
|
||||
frame.state = fsa.step(top.state, arc.label());
|
||||
//if (TEST) System.out.println(" loadCeil frame="+frame);
|
||||
if (frame.state == -1) {
|
||||
return loadNextFrame(top, frame);
|
||||
}
|
||||
frame.output = arc.output();
|
||||
return frame;
|
||||
}
|
||||
|
||||
boolean isAccept(Frame frame) { // reach a term both fst&fsa accepts
|
||||
return fsa.isAccept(frame.state) && frame.arc.isFinal();
|
||||
}
|
||||
boolean isValid(Frame frame) { // reach a prefix both fst&fsa won't reject
|
||||
return /*frame != null &&*/ frame.state != -1;
|
||||
}
|
||||
boolean canGrow(Frame frame) { // can walk forward on both fst&fsa
|
||||
return frame.state != -1 && FST.targetHasArcs(frame.arc);
|
||||
}
|
||||
boolean canRewind(Frame frame) { // can jump to sibling
|
||||
return !frame.arc.isLast();
|
||||
}
|
||||
|
||||
void pushFrame(Frame frame) {
|
||||
final FST.Arc<Long> arc = frame.arc;
|
||||
frame.output = fstOutputs.add(topFrame().output, frame.output);
|
||||
term = grow(arc.label());
|
||||
level++;
|
||||
assert frame == stack[level];
|
||||
}
|
||||
|
||||
Frame popFrame() {
|
||||
term = shrink();
|
||||
return stack[level--];
|
||||
}
|
||||
|
||||
Frame newFrame() {
|
||||
if (level+1 == stack.length) {
|
||||
final Frame[] temp = new Frame[ArrayUtil.oversize(level+2, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(stack, 0, temp, 0, stack.length);
|
||||
for (int i = stack.length; i < temp.length; i++) {
|
||||
temp[i] = new Frame();
|
||||
}
|
||||
stack = temp;
|
||||
}
|
||||
return stack[level+1];
|
||||
}
|
||||
|
||||
Frame topFrame() {
|
||||
return stack[level];
|
||||
}
|
||||
|
||||
BytesRefBuilder grow(int label) {
|
||||
if (term == null) {
|
||||
term = new BytesRefBuilder();
|
||||
} else {
|
||||
term.append((byte) label);
|
||||
}
|
||||
return term;
|
||||
}
|
||||
|
||||
BytesRefBuilder shrink() {
|
||||
if (term.length() == 0) {
|
||||
term = null;
|
||||
} else {
|
||||
term.setLength(term.length() - 1);
|
||||
}
|
||||
return term;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static<T> void walk(FST<T> fst) throws IOException {
|
||||
final ArrayList<FST.Arc<T>> queue = new ArrayList<>();
|
||||
final BitSet seen = new BitSet();
|
||||
final FST.BytesReader reader = fst.getBytesReader();
|
||||
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
|
||||
queue.add(startArc);
|
||||
while (!queue.isEmpty()) {
|
||||
final FST.Arc<T> arc = queue.remove(0);
|
||||
final long node = arc.target();
|
||||
//System.out.println(arc);
|
||||
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
|
||||
seen.set((int) node);
|
||||
fst.readFirstRealTargetArc(node, arc, reader);
|
||||
while (true) {
|
||||
queue.add(new FST.Arc<T>().copyFrom(arc));
|
||||
if (arc.isLast()) {
|
||||
break;
|
||||
} else {
|
||||
fst.readNextRealArc(arc, reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long ramBytesUsed = postingsReader.ramBytesUsed();
|
||||
for (TermsReader r : fields.values()) {
|
||||
ramBytesUsed += r.ramBytesUsed();
|
||||
}
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
List<Accountable> resources = new ArrayList<>(Accountables.namedAccountables("field", fields));
|
||||
resources.add(Accountables.namedAccountable("delegate", postingsReader));
|
||||
return Collections.unmodifiableList(resources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(fields=" + fields.size() + ",delegate=" + postingsReader + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
postingsReader.checkIntegrity();
|
||||
}
|
||||
}
|
|
@ -1,386 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteBuffersDataOutput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.fst.FSTCompiler;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
/**
|
||||
* FST-based term dict, using ord as FST output.
|
||||
*
|
||||
* The FST holds the mapping between <term, ord>, and
|
||||
* term's metadata is delta encoded into a single byte block.
|
||||
*
|
||||
* Typically the byte block consists of four parts:
|
||||
* 1. term statistics: docFreq, totalTermFreq;
|
||||
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
|
||||
* 3. generic byte[], e.g. other information customized by postings base.
|
||||
* 4. single-level skip list to speed up metadata decoding by ord.
|
||||
*
|
||||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
|
||||
* <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
|
||||
* </ul>
|
||||
*
|
||||
* <a name="Termindex"></a>
|
||||
* <h3>Term Index</h3>
|
||||
* <p>
|
||||
* The .tix contains a list of FSTs, one for each field.
|
||||
* The FST maps a term to its corresponding order in current field.
|
||||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>TermIndex(.tix) --> Header, TermFST<sup>NumFields</sup>, Footer</li>
|
||||
* <li>TermFST --> {@link FST FST<long>}</li>
|
||||
* <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>Notes:</p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
|
||||
* their ords can directly used to seek term metadata from term block.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* <a name="Termblock"></a>
|
||||
* <h3>Term Block</h3>
|
||||
* <p>
|
||||
* The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
|
||||
* per-field data like number of documents in current field). For each field, there are four blocks:
|
||||
* <ul>
|
||||
* <li>statistics bytes block: contains term statistics; </li>
|
||||
* <li>metadata longs block: delta-encodes monotonic part of metadata; </li>
|
||||
* <li>metadata bytes block: encodes other parts of metadata; </li>
|
||||
* <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>File Format:</p>
|
||||
* <ul>
|
||||
* <li>TermBlock(.tbk) --> Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
|
||||
* <li>FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
|
||||
* DocCount, LongsSize, DataBlock > <sup>NumFields</sup>, Footer</li>
|
||||
*
|
||||
* <li>DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
|
||||
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
|
||||
* <li>SkipBlock --> < StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
|
||||
* MetaLongsSkipDelta<sup>LongsSize</sup> ><sup>NumTerms</sup>
|
||||
* <li>StatsBlock --> < DocFreq[Same?], (TotalTermFreq-DocFreq) ? > <sup>NumTerms</sup>
|
||||
* <li>MetaLongsBlock --> < LongDelta<sup>LongsSize</sup>, BytesSize > <sup>NumTerms</sup>
|
||||
* <li>MetaBytesBlock --> Byte <sup>MetaBytesBlockLength</sup>
|
||||
* <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader}</li>
|
||||
* <li>DirOffset --> {@link DataOutput#writeLong Uint64}</li>
|
||||
* <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
|
||||
* FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
|
||||
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
|
||||
* LongDelta,--> {@link DataOutput#writeVLong VLong}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* <p>Notes: </p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
|
||||
* they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
|
||||
* (non-monotonic ones like pulsed postings data).
|
||||
* </li>
|
||||
* <li>
|
||||
* During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
|
||||
* term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
|
||||
* for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
|
||||
* the value of preceding metadata longs for every SkipInterval's term.
|
||||
* </li>
|
||||
* <li>
|
||||
* DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
|
||||
* Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
|
||||
* so that encoding of TotalTermFreq may be omitted.
|
||||
* </li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public class FSTOrdTermsWriter extends FieldsConsumer {
|
||||
static final String TERMS_INDEX_EXTENSION = "tix";
|
||||
static final String TERMS_BLOCK_EXTENSION = "tbk";
|
||||
static final String TERMS_CODEC_NAME = "FSTOrdTerms";
|
||||
static final String TERMS_INDEX_CODEC_NAME = "FSTOrdIndex";
|
||||
|
||||
public static final int VERSION_START = 2;
|
||||
public static final int VERSION_CURRENT = VERSION_START;
|
||||
public static final int SKIP_INTERVAL = 8;
|
||||
|
||||
final PostingsWriterBase postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
final int maxDoc;
|
||||
final List<FieldMetaData> fields = new ArrayList<>();
|
||||
IndexOutput blockOut = null;
|
||||
IndexOutput indexOut = null;
|
||||
|
||||
public FSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
|
||||
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
|
||||
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_BLOCK_EXTENSION);
|
||||
|
||||
this.postingsWriter = postingsWriter;
|
||||
this.fieldInfos = state.fieldInfos;
|
||||
this.maxDoc = state.segmentInfo.maxDoc();
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
this.indexOut = state.directory.createOutput(termsIndexFileName, state.context);
|
||||
this.blockOut = state.directory.createOutput(termsBlockFileName, state.context);
|
||||
CodecUtil.writeIndexHeader(indexOut, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
CodecUtil.writeIndexHeader(blockOut, TERMS_CODEC_NAME, VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
this.postingsWriter.init(blockOut, state);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(indexOut, blockOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Fields fields, NormsProducer norms) throws IOException {
|
||||
for(String field : fields) {
|
||||
Terms terms = fields.terms(field);
|
||||
if (terms == null) {
|
||||
continue;
|
||||
}
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||
boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
TermsWriter termsWriter = new TermsWriter(fieldInfo);
|
||||
|
||||
long sumTotalTermFreq = 0;
|
||||
long sumDocFreq = 0;
|
||||
FixedBitSet docsSeen = new FixedBitSet(maxDoc);
|
||||
while (true) {
|
||||
BytesRef term = termsEnum.next();
|
||||
if (term == null) {
|
||||
break;
|
||||
}
|
||||
BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms);
|
||||
if (termState != null) {
|
||||
termsWriter.finishTerm(term, termState);
|
||||
sumTotalTermFreq += termState.totalTermFreq;
|
||||
sumDocFreq += termState.docFreq;
|
||||
}
|
||||
}
|
||||
|
||||
termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (blockOut != null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
final long blockDirStart = blockOut.getFilePointer();
|
||||
|
||||
// write field summary
|
||||
blockOut.writeVInt(fields.size());
|
||||
for (FieldMetaData field : fields) {
|
||||
blockOut.writeVInt(field.fieldInfo.number);
|
||||
blockOut.writeVLong(field.numTerms);
|
||||
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
blockOut.writeVLong(field.sumTotalTermFreq);
|
||||
}
|
||||
blockOut.writeVLong(field.sumDocFreq);
|
||||
blockOut.writeVInt(field.docCount);
|
||||
blockOut.writeVInt(field.longsSize);
|
||||
blockOut.writeVLong(field.statsOut.size());
|
||||
blockOut.writeVLong(field.metaLongsOut.size());
|
||||
blockOut.writeVLong(field.metaBytesOut.size());
|
||||
|
||||
field.skipOut.copyTo(blockOut);
|
||||
field.statsOut.copyTo(blockOut);
|
||||
field.metaLongsOut.copyTo(blockOut);
|
||||
field.metaBytesOut.copyTo(blockOut);
|
||||
field.dict.save(indexOut);
|
||||
}
|
||||
writeTrailer(blockOut, blockDirStart);
|
||||
CodecUtil.writeFooter(indexOut);
|
||||
CodecUtil.writeFooter(blockOut);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(blockOut, indexOut, postingsWriter);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(blockOut, indexOut, postingsWriter);
|
||||
}
|
||||
blockOut = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void writeTrailer(IndexOutput out, long dirStart) throws IOException {
|
||||
out.writeLong(dirStart);
|
||||
}
|
||||
|
||||
private static class FieldMetaData {
|
||||
public FieldInfo fieldInfo;
|
||||
public long numTerms;
|
||||
public long sumTotalTermFreq;
|
||||
public long sumDocFreq;
|
||||
public int docCount;
|
||||
public int longsSize;
|
||||
public FST<Long> dict;
|
||||
|
||||
// TODO: block encode each part
|
||||
|
||||
// vint encode next skip point (fully decoded when reading)
|
||||
public ByteBuffersDataOutput skipOut;
|
||||
// vint encode df, (ttf-df)
|
||||
public ByteBuffersDataOutput statsOut;
|
||||
// vint encode monotonic long[] and length for corresponding byte[]
|
||||
public ByteBuffersDataOutput metaLongsOut;
|
||||
// generic byte[]
|
||||
public ByteBuffersDataOutput metaBytesOut;
|
||||
}
|
||||
|
||||
final class TermsWriter {
|
||||
private final FSTCompiler<Long> fstCompiler;
|
||||
private final PositiveIntOutputs outputs;
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int longsSize;
|
||||
private long numTerms;
|
||||
|
||||
private final IntsRefBuilder scratchTerm = new IntsRefBuilder();
|
||||
private final ByteBuffersDataOutput statsOut = new ByteBuffersDataOutput();
|
||||
private final ByteBuffersDataOutput metaLongsOut = new ByteBuffersDataOutput();
|
||||
private final ByteBuffersDataOutput metaBytesOut = new ByteBuffersDataOutput();
|
||||
private final ByteBuffersDataOutput skipOut = new ByteBuffersDataOutput();
|
||||
private long lastBlockStatsFP;
|
||||
private long lastBlockMetaLongsFP;
|
||||
private long lastBlockMetaBytesFP;
|
||||
private long[] lastBlockLongs;
|
||||
|
||||
private long[] lastLongs;
|
||||
private long lastMetaBytesFP;
|
||||
|
||||
TermsWriter(FieldInfo fieldInfo) {
|
||||
this.numTerms = 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.longsSize = postingsWriter.setField(fieldInfo);
|
||||
this.outputs = PositiveIntOutputs.getSingleton();
|
||||
this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
||||
this.lastBlockStatsFP = 0;
|
||||
this.lastBlockMetaLongsFP = 0;
|
||||
this.lastBlockMetaBytesFP = 0;
|
||||
this.lastBlockLongs = new long[longsSize];
|
||||
|
||||
this.lastLongs = new long[longsSize];
|
||||
this.lastMetaBytesFP = 0;
|
||||
}
|
||||
|
||||
public void finishTerm(BytesRef text, BlockTermState state) throws IOException {
|
||||
if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0) {
|
||||
bufferSkip();
|
||||
}
|
||||
// write term meta data into fst
|
||||
final long longs[] = new long[longsSize];
|
||||
final long delta = state.totalTermFreq - state.docFreq;
|
||||
if (state.totalTermFreq > 0) {
|
||||
if (delta == 0) {
|
||||
statsOut.writeVInt(state.docFreq<<1|1);
|
||||
} else {
|
||||
statsOut.writeVInt(state.docFreq<<1);
|
||||
statsOut.writeVLong(state.totalTermFreq-state.docFreq);
|
||||
}
|
||||
} else {
|
||||
statsOut.writeVInt(state.docFreq);
|
||||
}
|
||||
postingsWriter.encodeTerm(longs, metaBytesOut, fieldInfo, state, true);
|
||||
for (int i = 0; i < longsSize; i++) {
|
||||
metaLongsOut.writeVLong(longs[i] - lastLongs[i]);
|
||||
lastLongs[i] = longs[i];
|
||||
}
|
||||
metaLongsOut.writeVLong(metaBytesOut.size() - lastMetaBytesFP);
|
||||
|
||||
fstCompiler.add(Util.toIntsRef(text, scratchTerm), numTerms);
|
||||
numTerms++;
|
||||
|
||||
lastMetaBytesFP = metaBytesOut.size();
|
||||
}
|
||||
|
||||
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
|
||||
if (numTerms > 0) {
|
||||
final FieldMetaData metadata = new FieldMetaData();
|
||||
metadata.fieldInfo = fieldInfo;
|
||||
metadata.numTerms = numTerms;
|
||||
metadata.sumTotalTermFreq = sumTotalTermFreq;
|
||||
metadata.sumDocFreq = sumDocFreq;
|
||||
metadata.docCount = docCount;
|
||||
metadata.longsSize = longsSize;
|
||||
metadata.skipOut = skipOut;
|
||||
metadata.statsOut = statsOut;
|
||||
metadata.metaLongsOut = metaLongsOut;
|
||||
metadata.metaBytesOut = metaBytesOut;
|
||||
metadata.dict = fstCompiler.compile();
|
||||
fields.add(metadata);
|
||||
}
|
||||
}
|
||||
|
||||
private void bufferSkip() throws IOException {
|
||||
skipOut.writeVLong(statsOut.size() - lastBlockStatsFP);
|
||||
skipOut.writeVLong(metaLongsOut.size() - lastBlockMetaLongsFP);
|
||||
skipOut.writeVLong(metaBytesOut.size() - lastBlockMetaBytesFP);
|
||||
for (int i = 0; i < longsSize; i++) {
|
||||
skipOut.writeVLong(lastLongs[i] - lastBlockLongs[i]);
|
||||
}
|
||||
lastBlockStatsFP = statsOut.size();
|
||||
lastBlockMetaLongsFP = metaLongsOut.size();
|
||||
lastBlockMetaBytesFP = metaBytesOut.size();
|
||||
System.arraycopy(lastLongs, 0, lastBlockLongs, 0, longsSize);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -41,7 +41,6 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
private final static TermData NO_OUTPUT = new TermData();
|
||||
//private static boolean TEST = false;
|
||||
private final boolean hasPos;
|
||||
private final int longsSize;
|
||||
|
||||
/**
|
||||
* Represents the metadata for one term.
|
||||
|
@ -50,18 +49,15 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
*/
|
||||
static class TermData implements Accountable {
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermData.class);
|
||||
long[] longs;
|
||||
byte[] bytes;
|
||||
int docFreq;
|
||||
long totalTermFreq;
|
||||
TermData() {
|
||||
this.longs = null;
|
||||
this.bytes = null;
|
||||
this.docFreq = 0;
|
||||
this.totalTermFreq = -1;
|
||||
}
|
||||
TermData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) {
|
||||
this.longs = longs;
|
||||
TermData(byte[] bytes, int docFreq, long totalTermFreq) {
|
||||
this.bytes = bytes;
|
||||
this.docFreq = docFreq;
|
||||
this.totalTermFreq = totalTermFreq;
|
||||
|
@ -70,9 +66,6 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long ramBytesUsed = BASE_RAM_BYTES_USED;
|
||||
if (longs != null) {
|
||||
ramBytesUsed += RamUsageEstimator.sizeOf(longs);
|
||||
}
|
||||
if (bytes != null) {
|
||||
ramBytesUsed += RamUsageEstimator.sizeOf(bytes);
|
||||
}
|
||||
|
@ -85,14 +78,7 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
@Override
|
||||
public int hashCode() {
|
||||
int hash = 0;
|
||||
if (longs != null) {
|
||||
final int end = longs.length;
|
||||
for (int i = 0; i < end; i++) {
|
||||
hash -= longs[i];
|
||||
}
|
||||
}
|
||||
if (bytes != null) {
|
||||
hash = -hash;
|
||||
final int end = bytes.length;
|
||||
for (int i = 0; i < end; i++) {
|
||||
hash += bytes[i];
|
||||
|
@ -104,7 +90,7 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FSTTermOutputs$TermData longs=" + Arrays.toString(longs) + " bytes=" + Arrays.toString(bytes) + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq;
|
||||
return "FSTTermOutputs$TermData bytes=" + Arrays.toString(bytes) + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -116,15 +102,13 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
}
|
||||
TermData other = (TermData) other_;
|
||||
return statsEqual(this, other) &&
|
||||
longsEqual(this, other) &&
|
||||
bytesEqual(this, other);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
protected FSTTermOutputs(FieldInfo fieldInfo, int longsSize) {
|
||||
protected FSTTermOutputs(FieldInfo fieldInfo) {
|
||||
this.hasPos = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
|
||||
this.longsSize = longsSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -145,37 +129,13 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
//if (TEST) System.out.println("ret:"+NO_OUTPUT);
|
||||
return NO_OUTPUT;
|
||||
}
|
||||
assert t1.longs.length == t2.longs.length;
|
||||
|
||||
long[] min = t1.longs, max = t2.longs;
|
||||
int pos = 0;
|
||||
TermData ret;
|
||||
|
||||
while (pos < longsSize && min[pos] == max[pos]) {
|
||||
pos++;
|
||||
}
|
||||
if (pos < longsSize) { // unequal long[]
|
||||
if (min[pos] > max[pos]) {
|
||||
min = t2.longs;
|
||||
max = t1.longs;
|
||||
}
|
||||
// check whether strictly smaller
|
||||
while (pos < longsSize && min[pos] <= max[pos]) {
|
||||
pos++;
|
||||
}
|
||||
if (pos < longsSize || allZero(min)) { // not comparable or all-zero
|
||||
ret = NO_OUTPUT;
|
||||
} else {
|
||||
ret = new TermData(min, null, 0, -1);
|
||||
}
|
||||
} else { // equal long[]
|
||||
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
|
||||
ret = t1;
|
||||
} else if (allZero(min)) {
|
||||
ret = NO_OUTPUT;
|
||||
} else {
|
||||
ret = new TermData(min, null, 0, -1);
|
||||
}
|
||||
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
|
||||
ret = t1;
|
||||
} else {
|
||||
ret = NO_OUTPUT;
|
||||
}
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
|
@ -188,23 +148,12 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
//if (TEST) System.out.println("ret:"+t1);
|
||||
return t1;
|
||||
}
|
||||
assert t1.longs.length == t2.longs.length;
|
||||
|
||||
int pos = 0;
|
||||
long diff = 0;
|
||||
long[] share = new long[longsSize];
|
||||
|
||||
while (pos < longsSize) {
|
||||
share[pos] = t1.longs[pos] - t2.longs[pos];
|
||||
diff += share[pos];
|
||||
pos++;
|
||||
}
|
||||
|
||||
TermData ret;
|
||||
if (diff == 0 && statsEqual(t1, t2) && bytesEqual(t1, t2)) {
|
||||
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
|
||||
ret = NO_OUTPUT;
|
||||
} else {
|
||||
ret = new TermData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
ret = new TermData(t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
}
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
|
@ -223,21 +172,12 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
//if (TEST) System.out.println("ret:"+t1);
|
||||
return t1;
|
||||
}
|
||||
assert t1.longs.length == t2.longs.length;
|
||||
|
||||
int pos = 0;
|
||||
long[] accum = new long[longsSize];
|
||||
|
||||
while (pos < longsSize) {
|
||||
accum[pos] = t1.longs[pos] + t2.longs[pos];
|
||||
pos++;
|
||||
}
|
||||
|
||||
TermData ret;
|
||||
if (t2.bytes != null || t2.docFreq > 0) {
|
||||
ret = new TermData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq);
|
||||
ret = new TermData(t2.bytes, t2.docFreq, t2.totalTermFreq);
|
||||
} else {
|
||||
ret = new TermData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
ret = new TermData(t1.bytes, t1.docFreq, t1.totalTermFreq);
|
||||
}
|
||||
//if (TEST) System.out.println("ret:"+ret);
|
||||
return ret;
|
||||
|
@ -246,13 +186,12 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
@Override
|
||||
public void write(TermData data, DataOutput out) throws IOException {
|
||||
assert hasPos || data.totalTermFreq == -1;
|
||||
int bit0 = allZero(data.longs) ? 0 : 1;
|
||||
int bit1 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1) << 1;
|
||||
int bit2 = ((data.docFreq == 0) ? 0 : 1) << 2;
|
||||
int bits = bit0 | bit1 | bit2;
|
||||
if (bit1 > 0) { // determine extra length
|
||||
int bit0 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1);
|
||||
int bit1 = ((data.docFreq == 0) ? 0 : 1) << 1;
|
||||
int bits = bit0 | bit1;
|
||||
if (bit0 > 0) { // determine extra length
|
||||
if (data.bytes.length < 32) {
|
||||
bits |= (data.bytes.length << 3);
|
||||
bits |= (data.bytes.length << 2);
|
||||
out.writeByte((byte)bits);
|
||||
} else {
|
||||
out.writeByte((byte)bits);
|
||||
|
@ -261,15 +200,10 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
} else {
|
||||
out.writeByte((byte)bits);
|
||||
}
|
||||
if (bit0 > 0) { // not all-zero case
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
out.writeVLong(data.longs[pos]);
|
||||
}
|
||||
}
|
||||
if (bit1 > 0) { // bytes exists
|
||||
if (bit0 > 0) { // bytes exists
|
||||
out.writeBytes(data.bytes, 0, data.bytes.length);
|
||||
}
|
||||
if (bit2 > 0) { // stats exist
|
||||
if (bit1 > 0) { // stats exist
|
||||
if (hasPos) {
|
||||
if (data.docFreq == data.totalTermFreq) {
|
||||
out.writeVInt((data.docFreq << 1) | 1);
|
||||
|
@ -285,28 +219,21 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
|
||||
@Override
|
||||
public TermData read(DataInput in) throws IOException {
|
||||
long[] longs = new long[longsSize];
|
||||
byte[] bytes = null;
|
||||
int docFreq = 0;
|
||||
long totalTermFreq = -1;
|
||||
int bits = in.readByte() & 0xff;
|
||||
int bit0 = bits & 1;
|
||||
int bit1 = bits & 2;
|
||||
int bit2 = bits & 4;
|
||||
int bytesSize = (bits >>> 3);
|
||||
if (bit1 > 0 && bytesSize == 0) { // determine extra length
|
||||
int bytesSize = (bits >>> 2);
|
||||
if (bit0 > 0 && bytesSize == 0) { // determine extra length
|
||||
bytesSize = in.readVInt();
|
||||
}
|
||||
if (bit0 > 0) { // not all-zero case
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
longs[pos] = in.readVLong();
|
||||
}
|
||||
}
|
||||
if (bit1 > 0) { // bytes exists
|
||||
if (bit0 > 0) { // bytes exists
|
||||
bytes = new byte[bytesSize];
|
||||
in.readBytes(bytes, 0, bytesSize);
|
||||
}
|
||||
if (bit2 > 0) { // stats exist
|
||||
if (bit1 > 0) { // stats exist
|
||||
int code = in.readVInt();
|
||||
if (hasPos) {
|
||||
totalTermFreq = docFreq = code >>> 1;
|
||||
|
@ -317,7 +244,7 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
docFreq = code;
|
||||
}
|
||||
}
|
||||
return new TermData(longs, bytes, docFreq, totalTermFreq);
|
||||
return new TermData(bytes, docFreq, totalTermFreq);
|
||||
}
|
||||
|
||||
|
||||
|
@ -326,20 +253,14 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
int bits = in.readByte() & 0xff;
|
||||
int bit0 = bits & 1;
|
||||
int bit1 = bits & 2;
|
||||
int bit2 = bits & 4;
|
||||
int bytesSize = (bits >>> 3);
|
||||
if (bit1 > 0 && bytesSize == 0) { // determine extra length
|
||||
int bytesSize = (bits >>> 2);
|
||||
if (bit0 > 0 && bytesSize == 0) { // determine extra length
|
||||
bytesSize = in.readVInt();
|
||||
}
|
||||
if (bit0 > 0) { // not all-zero case
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
in.readVLong();
|
||||
}
|
||||
}
|
||||
if (bit1 > 0) { // bytes exists
|
||||
if (bit0 > 0) { // bytes exists
|
||||
in.skipBytes(bytesSize);
|
||||
}
|
||||
if (bit2 > 0) { // stats exist
|
||||
if (bit1 > 0) { // stats exist
|
||||
int code = in.readVInt();
|
||||
if (hasPos && (code & 1) == 0) {
|
||||
in.readVLong();
|
||||
|
@ -366,18 +287,4 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
|
|||
}
|
||||
return t1.bytes != null && t2.bytes != null && Arrays.equals(t1.bytes, t2.bytes);
|
||||
}
|
||||
static boolean longsEqual(final TermData t1, final TermData t2) {
|
||||
if (t1.longs == null && t2.longs == null) {
|
||||
return true;
|
||||
}
|
||||
return t1.longs != null && t2.longs != null && Arrays.equals(t1.longs, t2.longs);
|
||||
}
|
||||
static boolean allZero(final long[] l) {
|
||||
for (int i = 0; i < l.length; i++) {
|
||||
if (l[i] != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -99,8 +99,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
// if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
|
||||
long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
|
||||
int docCount = in.readVInt();
|
||||
int longsSize = in.readVInt();
|
||||
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
|
||||
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
|
||||
TermsReader previous = fields.put(fieldInfo.name, current);
|
||||
checkFieldSummary(state.segmentInfo, in, current, previous);
|
||||
}
|
||||
|
@ -169,17 +168,15 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
final long sumTotalTermFreq;
|
||||
final long sumDocFreq;
|
||||
final int docCount;
|
||||
final int longsSize;
|
||||
final FST<FSTTermOutputs.TermData> dict;
|
||||
|
||||
TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
|
||||
TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.dict = new FST<>(in, new FSTTermOutputs(fieldInfo, longsSize));
|
||||
this.dict = new FST<>(in, new FSTTermOutputs(fieldInfo));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -349,7 +346,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
if (meta.bytes != null) {
|
||||
bytesReader.reset(meta.bytes, 0, meta.bytes.length);
|
||||
}
|
||||
postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state, true);
|
||||
postingsReader.decodeTerm(bytesReader, fieldInfo, state, true);
|
||||
decoded = true;
|
||||
}
|
||||
}
|
||||
|
@ -495,7 +492,7 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
if (meta.bytes != null) {
|
||||
bytesReader.reset(meta.bytes, 0, meta.bytes.length);
|
||||
}
|
||||
postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state, true);
|
||||
postingsReader.decodeTerm(bytesReader, fieldInfo, state, true);
|
||||
decoded = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -209,7 +209,6 @@ public class FSTTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
out.writeVLong(field.sumDocFreq);
|
||||
out.writeVInt(field.docCount);
|
||||
out.writeVInt(field.longsSize);
|
||||
field.dict.save(out);
|
||||
}
|
||||
writeTrailer(out, dirStart);
|
||||
|
@ -232,16 +231,14 @@ public class FSTTermsWriter extends FieldsConsumer {
|
|||
public final long sumTotalTermFreq;
|
||||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
public final int longsSize;
|
||||
public final FST<FSTTermOutputs.TermData> dict;
|
||||
|
||||
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<FSTTermOutputs.TermData> fst) {
|
||||
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, FST<FSTTermOutputs.TermData> fst) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.numTerms = numTerms;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.dict = fst;
|
||||
}
|
||||
}
|
||||
|
@ -250,7 +247,6 @@ public class FSTTermsWriter extends FieldsConsumer {
|
|||
private final FSTCompiler<FSTTermOutputs.TermData> fstCompiler;
|
||||
private final FSTTermOutputs outputs;
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int longsSize;
|
||||
private long numTerms;
|
||||
|
||||
private final IntsRefBuilder scratchTerm = new IntsRefBuilder();
|
||||
|
@ -259,19 +255,18 @@ public class FSTTermsWriter extends FieldsConsumer {
|
|||
TermsWriter(FieldInfo fieldInfo) {
|
||||
this.numTerms = 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
this.longsSize = postingsWriter.setField(fieldInfo);
|
||||
this.outputs = new FSTTermOutputs(fieldInfo, longsSize);
|
||||
postingsWriter.setField(fieldInfo);
|
||||
this.outputs = new FSTTermOutputs(fieldInfo);
|
||||
this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
}
|
||||
|
||||
public void finishTerm(BytesRef text, BlockTermState state) throws IOException {
|
||||
// write term meta data into fst
|
||||
final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData();
|
||||
meta.longs = new long[longsSize];
|
||||
meta.bytes = null;
|
||||
meta.docFreq = state.docFreq;
|
||||
meta.totalTermFreq = state.totalTermFreq;
|
||||
postingsWriter.encodeTerm(meta.longs, metaWriter, fieldInfo, state, true);
|
||||
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, true);
|
||||
if (metaWriter.size() > 0) {
|
||||
meta.bytes = metaWriter.toArrayCopy();
|
||||
metaWriter.reset();
|
||||
|
@ -284,7 +279,7 @@ public class FSTTermsWriter extends FieldsConsumer {
|
|||
// save FST dict
|
||||
if (numTerms > 0) {
|
||||
final FST<FSTTermOutputs.TermData> fst = fstCompiler.compile();
|
||||
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
|
||||
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, fst));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ public class DeltaBaseTermStateSerializer implements Accountable {
|
|||
/**
|
||||
* Writes a {@link BlockTermState} to the provided {@link DataOutput}.
|
||||
* <p>
|
||||
* Simpler variant of {@link Lucene84PostingsWriter#encodeTerm(long[], DataOutput, FieldInfo, BlockTermState, boolean)}.
|
||||
* Simpler variant of {@link Lucene84PostingsWriter#encodeTerm(DataOutput, FieldInfo, BlockTermState, boolean)}.
|
||||
*/
|
||||
public void writeTermState(DataOutput termStatesOutput, FieldInfo fieldInfo, BlockTermState termState) throws IOException {
|
||||
IndexOptions indexOptions = fieldInfo.getIndexOptions();
|
||||
|
@ -143,7 +143,7 @@ public class DeltaBaseTermStateSerializer implements Accountable {
|
|||
/**
|
||||
* Reads a {@link BlockTermState} from the provided {@link DataInput}.
|
||||
* <p>
|
||||
* Simpler variant of {@link Lucene84PostingsReader#decodeTerm(long[], DataInput, FieldInfo, BlockTermState, boolean)}.
|
||||
* Simpler variant of {@link Lucene84PostingsReader#decodeTerm(DataInput, FieldInfo, BlockTermState, boolean)}.
|
||||
*
|
||||
* @param reuse {@link BlockTermState} to reuse; or null to create a new one.
|
||||
*/
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat
|
||||
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
|
||||
org.apache.lucene.codecs.memory.DirectPostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTPostingsFormat
|
||||
org.apache.lucene.codecs.uniformsplit.UniformSplitPostingsFormat
|
||||
org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPostingsFormat
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.memory;
|
||||
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/**
|
||||
* Tests FSTOrdPostingsFormat
|
||||
*/
|
||||
public class TestFSTOrdPostingsFormat extends BasePostingsFormatTestCase {
|
||||
private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTOrdPostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
}
|
|
@ -159,7 +159,7 @@ public class TestTermBytesComparator extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {
|
||||
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -268,7 +268,7 @@ public class STBlockReaderTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {
|
||||
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -61,7 +61,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
|
|||
/** Actually decode metadata for next term
|
||||
* @see PostingsWriterBase#encodeTerm
|
||||
*/
|
||||
public abstract void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
|
||||
public abstract void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
|
||||
|
||||
/** Must fully consume state, since after this call that
|
||||
* TermState may be reused. */
|
||||
|
|
|
@ -68,21 +68,12 @@ public abstract class PostingsWriterBase implements Closeable {
|
|||
* Usually elements in {@code longs} are file pointers, so each one always
|
||||
* increases when a new term is consumed. {@code out} is used to write generic
|
||||
* bytes, which are not monotonic.
|
||||
*
|
||||
* NOTE: sometimes long[] might contain "don't care" values that are unused, e.g.
|
||||
* the pointer to postings list may not be defined for some terms but is defined
|
||||
* for others, if it is designed to inline some postings data in term dictionary.
|
||||
* In this case, the postings writer should always use the last value, so that each
|
||||
* element in metadata long[] remains monotonic.
|
||||
*/
|
||||
public abstract void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
|
||||
public abstract void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
|
||||
|
||||
/**
|
||||
* Sets the current field for writing, and returns the
|
||||
* fixed length of long[] metadata (which is fixed per
|
||||
* field), called when the writing switches to another field. */
|
||||
// TODO: better name?
|
||||
public abstract int setField(FieldInfo fieldInfo);
|
||||
* Sets the current field for writing. */
|
||||
public abstract void setField(FieldInfo fieldInfo);
|
||||
|
||||
@Override
|
||||
public abstract void close() throws IOException;
|
||||
|
|
|
@ -87,7 +87,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
|
|||
* fixed length of long[] metadata (which is fixed per
|
||||
* field), called when the writing switches to another field. */
|
||||
@Override
|
||||
public int setField(FieldInfo fieldInfo) {
|
||||
public void setField(FieldInfo fieldInfo) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
indexOptions = fieldInfo.getIndexOptions();
|
||||
|
||||
|
@ -113,8 +113,6 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
|
|||
enumFlags = PostingsEnum.OFFSETS;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -128,8 +128,11 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
/** Auto-prefix terms have been superseded by points. */
|
||||
public static final int VERSION_AUTO_PREFIX_TERMS_REMOVED = 3;
|
||||
|
||||
/** The long[] + byte[] metadata has been replaced with a single byte[]. */
|
||||
public static final int VERSION_META_LONGS_REMOVED = 4;
|
||||
|
||||
/** Current terms format. */
|
||||
public static final int VERSION_CURRENT = VERSION_AUTO_PREFIX_TERMS_REMOVED;
|
||||
public static final int VERSION_CURRENT = VERSION_META_LONGS_REMOVED;
|
||||
|
||||
/** Extension of terms index file */
|
||||
static final String TERMS_INDEX_EXTENSION = "tip";
|
||||
|
@ -212,9 +215,11 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
// when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
|
||||
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong();
|
||||
final int docCount = termsIn.readVInt();
|
||||
final int longsSize = termsIn.readVInt();
|
||||
if (longsSize < 0) {
|
||||
throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn);
|
||||
if (version < VERSION_META_LONGS_REMOVED) {
|
||||
final int longsSize = termsIn.readVInt();
|
||||
if (longsSize < 0) {
|
||||
throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn);
|
||||
}
|
||||
}
|
||||
BytesRef minTerm = readBytesRef(termsIn);
|
||||
BytesRef maxTerm = readBytesRef(termsIn);
|
||||
|
@ -231,7 +236,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
final long indexStartFP = indexIn.readVLong();
|
||||
FieldReader previous = fieldMap.put(fieldInfo.name,
|
||||
new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
|
||||
indexStartFP, longsSize, indexIn, minTerm, maxTerm, state.openedFromWriter, perFieldLoadMode));
|
||||
indexStartFP, indexIn, minTerm, maxTerm, state.openedFromWriter, perFieldLoadMode));
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
|
||||
}
|
||||
|
|
|
@ -224,11 +224,10 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
public final long sumTotalTermFreq;
|
||||
public final long sumDocFreq;
|
||||
public final int docCount;
|
||||
private final int longsSize;
|
||||
public final BytesRef minTerm;
|
||||
public final BytesRef maxTerm;
|
||||
|
||||
public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize,
|
||||
public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount,
|
||||
BytesRef minTerm, BytesRef maxTerm) {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
|
@ -239,7 +238,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
this.longsSize = longsSize;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
}
|
||||
|
@ -509,7 +507,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
class TermsWriter {
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int longsSize;
|
||||
private long numTerms;
|
||||
final FixedBitSet docsSeen;
|
||||
long sumTotalTermFreq;
|
||||
|
@ -524,8 +521,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
|
||||
private int[] prefixStarts = new int[8];
|
||||
|
||||
private final long[] longs;
|
||||
|
||||
// Pending stack of terms and blocks. As terms arrive (in sorted order)
|
||||
// we append to this stack, and once the top of the stack has enough
|
||||
// terms starting with a common prefix, we write a new block with
|
||||
|
@ -720,13 +715,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
// Write term meta data
|
||||
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
assert longs[pos] >= 0;
|
||||
metaWriter.writeVLong(longs[pos]);
|
||||
}
|
||||
bytesWriter.copyTo(metaWriter);
|
||||
bytesWriter.reset();
|
||||
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
|
||||
absolute = false;
|
||||
}
|
||||
} else {
|
||||
|
@ -771,13 +760,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// separate anymore:
|
||||
|
||||
// Write term meta data
|
||||
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
assert longs[pos] >= 0;
|
||||
metaWriter.writeVLong(longs[pos]);
|
||||
}
|
||||
bytesWriter.copyTo(metaWriter);
|
||||
bytesWriter.reset();
|
||||
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
|
||||
absolute = false;
|
||||
} else {
|
||||
PendingBlock block = (PendingBlock) ent;
|
||||
|
@ -845,9 +828,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
this.fieldInfo = fieldInfo;
|
||||
assert fieldInfo.getIndexOptions() != IndexOptions.NONE;
|
||||
docsSeen = new FixedBitSet(maxDoc);
|
||||
|
||||
this.longsSize = postingsWriter.setField(fieldInfo);
|
||||
this.longs = new long[longsSize];
|
||||
postingsWriter.setField(fieldInfo);
|
||||
}
|
||||
|
||||
/** Writes one term's worth of postings. */
|
||||
|
@ -964,7 +945,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
sumTotalTermFreq,
|
||||
sumDocFreq,
|
||||
docsSeen.cardinality(),
|
||||
longsSize,
|
||||
minTerm, maxTerm));
|
||||
} else {
|
||||
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1;
|
||||
|
@ -976,7 +956,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
}
|
||||
|
||||
private boolean closed;
|
||||
|
@ -1009,7 +988,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
termsOut.writeVLong(field.sumDocFreq);
|
||||
termsOut.writeVInt(field.docCount);
|
||||
termsOut.writeVInt(field.longsSize);
|
||||
indexOut.writeVLong(field.indexStartFP);
|
||||
writeBytesRef(termsOut, field.minTerm);
|
||||
writeBytesRef(termsOut, field.maxTerm);
|
||||
|
|
|
@ -58,7 +58,6 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
final BytesRef rootCode;
|
||||
final BytesRef minTerm;
|
||||
final BytesRef maxTerm;
|
||||
final int longsSize;
|
||||
final BlockTreeTermsReader parent;
|
||||
|
||||
final FST<BytesRef> index;
|
||||
|
@ -66,7 +65,7 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
//private boolean DEBUG;
|
||||
|
||||
FieldReader(BlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
|
||||
long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm, boolean openedFromWriter, BlockTreeTermsReader.FSTLoadMode fstLoadMode) throws IOException {
|
||||
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm, boolean openedFromWriter, BlockTreeTermsReader.FSTLoadMode fstLoadMode) throws IOException {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
|
||||
|
@ -77,7 +76,6 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
this.docCount = docCount;
|
||||
this.indexStartFP = indexStartFP;
|
||||
this.rootCode = rootCode;
|
||||
this.longsSize = longsSize;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
// if (DEBUG) {
|
||||
|
|
|
@ -80,11 +80,8 @@ final class IntersectTermsEnumFrame {
|
|||
FST.Arc<BytesRef> arc;
|
||||
|
||||
final BlockTermState termState;
|
||||
|
||||
// metadata buffer, holding monotonic values
|
||||
final long[] longs;
|
||||
|
||||
// metadata buffer, holding general values
|
||||
// metadata buffer
|
||||
byte[] bytes = new byte[32];
|
||||
|
||||
final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
|
||||
|
@ -102,7 +99,6 @@ final class IntersectTermsEnumFrame {
|
|||
this.ord = ord;
|
||||
this.termState = ite.fr.parent.postingsReader.newTermState();
|
||||
this.termState.totalTermFreq = -1;
|
||||
this.longs = new long[ite.fr.longsSize];
|
||||
}
|
||||
|
||||
void loadNextFloorBlock() throws IOException {
|
||||
|
@ -278,11 +274,8 @@ final class IntersectTermsEnumFrame {
|
|||
} else {
|
||||
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
|
||||
}
|
||||
// metadata
|
||||
for (int i = 0; i < ite.fr.longsSize; i++) {
|
||||
longs[i] = bytesReader.readVLong();
|
||||
}
|
||||
ite.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ite.fr.fieldInfo, termState, absolute);
|
||||
// metadata
|
||||
ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute);
|
||||
|
||||
metaDataUpto++;
|
||||
absolute = false;
|
||||
|
|
|
@ -85,9 +85,7 @@ final class SegmentTermsEnumFrame {
|
|||
|
||||
final BlockTermState state;
|
||||
|
||||
// metadata buffer, holding monotonic values
|
||||
final long[] longs;
|
||||
// metadata buffer, holding general values
|
||||
// metadata buffer
|
||||
byte[] bytes = new byte[32];
|
||||
final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
|
||||
|
||||
|
@ -98,7 +96,6 @@ final class SegmentTermsEnumFrame {
|
|||
this.ord = ord;
|
||||
this.state = ste.fr.parent.postingsReader.newTermState();
|
||||
this.state.totalTermFreq = -1;
|
||||
this.longs = new long[ste.fr.longsSize];
|
||||
}
|
||||
|
||||
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
|
||||
|
@ -424,11 +421,8 @@ final class SegmentTermsEnumFrame {
|
|||
state.totalTermFreq = state.docFreq + statsReader.readVLong();
|
||||
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
|
||||
}
|
||||
// metadata
|
||||
for (int i = 0; i < ste.fr.longsSize; i++) {
|
||||
longs[i] = bytesReader.readVLong();
|
||||
}
|
||||
ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute);
|
||||
// metadata
|
||||
ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute);
|
||||
|
||||
metaDataUpto++;
|
||||
absolute = false;
|
||||
|
|
|
@ -166,7 +166,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
|
||||
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
|
||||
throws IOException {
|
||||
final IntBlockTermState termState = (IntBlockTermState) _termState;
|
||||
final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
|
@ -179,11 +179,11 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
|
|||
termState.payStartFP = 0;
|
||||
}
|
||||
|
||||
termState.docStartFP += longs[0];
|
||||
termState.docStartFP += in.readVLong();
|
||||
if (fieldHasPositions) {
|
||||
termState.posStartFP += longs[1];
|
||||
termState.posStartFP += in.readVLong();
|
||||
if (fieldHasOffsets || fieldHasPayloads) {
|
||||
termState.payStartFP += longs[2];
|
||||
termState.payStartFP += in.readVLong();
|
||||
}
|
||||
}
|
||||
if (termState.docFreq == 1) {
|
||||
|
|
|
@ -190,20 +190,11 @@ public final class Lucene84PostingsWriter extends PushPostingsWriterBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int setField(FieldInfo fieldInfo) {
|
||||
public void setField(FieldInfo fieldInfo) {
|
||||
super.setField(fieldInfo);
|
||||
skipWriter.setField(writePositions, writeOffsets, writePayloads);
|
||||
lastState = emptyState;
|
||||
fieldHasNorms = fieldInfo.hasNorms();
|
||||
if (writePositions) {
|
||||
if (writePayloads || writeOffsets) {
|
||||
return 3; // doc + pos + pay FP
|
||||
} else {
|
||||
return 2; // doc + pos FP
|
||||
}
|
||||
} else {
|
||||
return 1; // doc FP
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -466,16 +457,16 @@ public final class Lucene84PostingsWriter extends PushPostingsWriterBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||
IntBlockTermState state = (IntBlockTermState)_state;
|
||||
if (absolute) {
|
||||
lastState = emptyState;
|
||||
}
|
||||
longs[0] = state.docStartFP - lastState.docStartFP;
|
||||
out.writeVLong(state.docStartFP - lastState.docStartFP);
|
||||
if (writePositions) {
|
||||
longs[1] = state.posStartFP - lastState.posStartFP;
|
||||
out.writeVLong(state.posStartFP - lastState.posStartFP);
|
||||
if (writePayloads || writeOffsets) {
|
||||
longs[2] = state.payStartFP - lastState.payStartFP;
|
||||
out.writeVLong(state.payStartFP - lastState.payStartFP);
|
||||
}
|
||||
}
|
||||
if (state.singletonDocID != -1) {
|
||||
|
|
|
@ -50,7 +50,7 @@ final class IDVersionPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
|
||||
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
|
||||
throws IOException {
|
||||
final IDVersionTermState termState = (IDVersionTermState) _termState;
|
||||
termState.docID = in.readVInt();
|
||||
|
|
|
@ -46,7 +46,6 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
private long lastVersion;
|
||||
|
||||
private final Bits liveDocs;
|
||||
private String segment;
|
||||
|
||||
public IDVersionPostingsWriter(Bits liveDocs) {
|
||||
this.liveDocs = liveDocs;
|
||||
|
@ -60,11 +59,10 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
@Override
|
||||
public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException {
|
||||
CodecUtil.writeIndexHeader(termsOut, TERMS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
|
||||
segment = state.segmentInfo.name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int setField(FieldInfo fieldInfo) {
|
||||
public void setField(FieldInfo fieldInfo) {
|
||||
super.setField(fieldInfo);
|
||||
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
throw new IllegalArgumentException("field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS");
|
||||
|
@ -75,7 +73,6 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
throw new IllegalArgumentException("field cannot index term vectors: CheckIndex will report this as index corruption");
|
||||
}
|
||||
lastState = emptyState;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -154,7 +151,7 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
|
|||
private long lastEncodedVersion;
|
||||
|
||||
@Override
|
||||
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||
IDVersionTermState state = (IDVersionTermState) _state;
|
||||
out.writeVInt(state.docID);
|
||||
if (absolute) {
|
||||
|
|
|
@ -83,9 +83,7 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
|
||||
final BlockTermState state;
|
||||
|
||||
// metadata buffer, holding monotonic values
|
||||
public long[] longs;
|
||||
// metadata buffer, holding general values
|
||||
// metadata
|
||||
public byte[] bytes;
|
||||
ByteArrayDataInput bytesReader;
|
||||
|
||||
|
@ -96,7 +94,6 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
this.ord = ord;
|
||||
this.state = ste.fr.parent.postingsReader.newTermState();
|
||||
this.state.totalTermFreq = -1;
|
||||
this.longs = new long[ste.fr.longsSize];
|
||||
}
|
||||
|
||||
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
|
||||
|
@ -396,11 +393,8 @@ final class IDVersionSegmentTermsEnumFrame {
|
|||
state.docFreq = 1;
|
||||
state.totalTermFreq = 1;
|
||||
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
|
||||
// metadata
|
||||
for (int i = 0; i < ste.fr.longsSize; i++) {
|
||||
longs[i] = bytesReader.readVLong();
|
||||
}
|
||||
ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute);
|
||||
// metadata
|
||||
ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute);
|
||||
|
||||
metaDataUpto++;
|
||||
absolute = false;
|
||||
|
|
|
@ -127,7 +127,6 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer {
|
|||
final long sumDocFreq = numTerms;
|
||||
assert numTerms <= Integer.MAX_VALUE;
|
||||
final int docCount = (int) numTerms;
|
||||
final int longsSize = in.readVInt();
|
||||
|
||||
BytesRef minTerm = readBytesRef(in);
|
||||
BytesRef maxTerm = readBytesRef(in);
|
||||
|
@ -143,7 +142,7 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer {
|
|||
final long indexStartFP = indexIn.readVLong();
|
||||
VersionFieldReader previous = fields.put(fieldInfo.name,
|
||||
new VersionFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
|
||||
indexStartFP, longsSize, indexIn, minTerm, maxTerm));
|
||||
indexStartFP, indexIn, minTerm, maxTerm));
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
|
||||
}
|
||||
|
|
|
@ -143,11 +143,10 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
public final Pair<BytesRef,Long> rootCode;
|
||||
public final long numTerms;
|
||||
public final long indexStartFP;
|
||||
private final int longsSize;
|
||||
public final BytesRef minTerm;
|
||||
public final BytesRef maxTerm;
|
||||
|
||||
public FieldMetaData(FieldInfo fieldInfo, Pair<BytesRef,Long> rootCode, long numTerms, long indexStartFP, int longsSize,
|
||||
public FieldMetaData(FieldInfo fieldInfo, Pair<BytesRef,Long> rootCode, long numTerms, long indexStartFP,
|
||||
BytesRef minTerm, BytesRef maxTerm) {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
|
@ -155,7 +154,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
this.rootCode = rootCode;
|
||||
this.indexStartFP = indexStartFP;
|
||||
this.numTerms = numTerms;
|
||||
this.longsSize = longsSize;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
}
|
||||
|
@ -403,7 +401,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
class TermsWriter {
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int longsSize;
|
||||
private long numTerms;
|
||||
final FixedBitSet docsSeen;
|
||||
long indexStartFP;
|
||||
|
@ -416,8 +413,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
|
||||
private int[] prefixStarts = new int[8];
|
||||
|
||||
private final long[] longs;
|
||||
|
||||
// Pending stack of terms and blocks. As terms arrive (in sorted order)
|
||||
// we append to this stack, and once the top of the stack has enough
|
||||
// terms starting with a common prefix, we write a new block with
|
||||
|
@ -605,13 +600,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;
|
||||
|
||||
// Write term meta data
|
||||
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
assert longs[pos] >= 0;
|
||||
metaWriter.writeVLong(longs[pos]);
|
||||
}
|
||||
bytesWriter.copyTo(metaWriter);
|
||||
bytesWriter.reset();
|
||||
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
|
||||
absolute = false;
|
||||
}
|
||||
} else {
|
||||
|
@ -648,13 +637,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
// separate anymore:
|
||||
|
||||
// Write term meta data
|
||||
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
|
||||
for (int pos = 0; pos < longsSize; pos++) {
|
||||
assert longs[pos] >= 0;
|
||||
metaWriter.writeVLong(longs[pos]);
|
||||
}
|
||||
bytesWriter.copyTo(metaWriter);
|
||||
bytesWriter.reset();
|
||||
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
|
||||
absolute = false;
|
||||
} else {
|
||||
PendingBlock block = (PendingBlock) ent;
|
||||
|
@ -720,8 +703,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
this.fieldInfo = fieldInfo;
|
||||
docsSeen = new FixedBitSet(maxDoc);
|
||||
|
||||
this.longsSize = postingsWriter.setField(fieldInfo);
|
||||
this.longs = new long[longsSize];
|
||||
postingsWriter.setField(fieldInfo);
|
||||
}
|
||||
|
||||
/** Writes one term's worth of postings. */
|
||||
|
@ -818,7 +800,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
((PendingBlock) pending.get(0)).index.getEmptyOutput(),
|
||||
numTerms,
|
||||
indexStartFP,
|
||||
longsSize,
|
||||
minTerm, maxTerm));
|
||||
} else {
|
||||
// cannot assert this: we skip deleted docIDs in the postings:
|
||||
|
@ -828,7 +809,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
|
||||
}
|
||||
|
||||
private boolean closed;
|
||||
|
@ -856,7 +836,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
|
|||
out.writeVInt(field.rootCode.output1.length);
|
||||
out.writeBytes(field.rootCode.output1.bytes, field.rootCode.output1.offset, field.rootCode.output1.length);
|
||||
out.writeVLong(field.rootCode.output2);
|
||||
out.writeVInt(field.longsSize);
|
||||
indexOut.writeVLong(field.indexStartFP);
|
||||
writeBytesRef(out, field.minTerm);
|
||||
writeBytesRef(out, field.maxTerm);
|
||||
|
|
|
@ -45,14 +45,13 @@ final class VersionFieldReader extends Terms implements Accountable {
|
|||
final Pair<BytesRef,Long> rootCode;
|
||||
final BytesRef minTerm;
|
||||
final BytesRef maxTerm;
|
||||
final int longsSize;
|
||||
final VersionBlockTreeTermsReader parent;
|
||||
|
||||
final FST<Pair<BytesRef,Long>> index;
|
||||
//private boolean DEBUG;
|
||||
|
||||
VersionFieldReader(VersionBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, Pair<BytesRef,Long> rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
|
||||
long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
|
||||
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
|
||||
assert numTerms > 0;
|
||||
this.fieldInfo = fieldInfo;
|
||||
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
|
||||
|
@ -63,7 +62,6 @@ final class VersionFieldReader extends Terms implements Accountable {
|
|||
this.docCount = docCount;
|
||||
this.indexStartFP = indexStartFP;
|
||||
this.rootCode = rootCode;
|
||||
this.longsSize = longsSize;
|
||||
this.minTerm = minTerm;
|
||||
this.maxTerm = maxTerm;
|
||||
// if (DEBUG) {
|
||||
|
|
|
@ -41,8 +41,6 @@ import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsReader;
|
|||
import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.lucene84.Lucene84PostingsReader;
|
||||
import org.apache.lucene.codecs.lucene84.Lucene84PostingsWriter;
|
||||
import org.apache.lucene.codecs.memory.FSTOrdTermsReader;
|
||||
import org.apache.lucene.codecs.memory.FSTOrdTermsWriter;
|
||||
import org.apache.lucene.codecs.memory.FSTTermsReader;
|
||||
import org.apache.lucene.codecs.memory.FSTTermsWriter;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -122,7 +120,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
|
||||
|
||||
final FieldsConsumer fields;
|
||||
final int t1 = random.nextInt(5);
|
||||
final int t1 = random.nextInt(4);
|
||||
|
||||
if (t1 == 0) {
|
||||
boolean success = false;
|
||||
|
@ -135,16 +133,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
} else if (t1 == 1) {
|
||||
boolean success = false;
|
||||
try {
|
||||
fields = new FSTOrdTermsWriter(state, postingsWriter);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
postingsWriter.close();
|
||||
}
|
||||
}
|
||||
} else if (t1 == 2) {
|
||||
// Use BlockTree terms dict
|
||||
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
|
@ -165,7 +153,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
postingsWriter.close();
|
||||
}
|
||||
}
|
||||
} else if (t1 == 3) {
|
||||
} else if (t1 == 2) {
|
||||
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("MockRandomCodec: writing Block terms dict");
|
||||
|
@ -235,7 +223,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (t1 == 4) {
|
||||
} else if (t1 == 3) {
|
||||
// Use OrdsBlockTree terms dict
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("MockRandomCodec: writing OrdsBlockTree");
|
||||
|
@ -287,7 +275,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
|
||||
|
||||
final FieldsProducer fields;
|
||||
final int t1 = random.nextInt(5);
|
||||
final int t1 = random.nextInt(4);
|
||||
if (t1 == 0) {
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -299,16 +287,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
} else if (t1 == 1) {
|
||||
boolean success = false;
|
||||
try {
|
||||
fields = new FSTOrdTermsReader(state, postingsReader);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
postingsReader.close();
|
||||
}
|
||||
}
|
||||
} else if (t1 == 2) {
|
||||
// Use BlockTree terms dict
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("MockRandomCodec: reading BlockTree terms dict");
|
||||
|
@ -323,7 +301,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
postingsReader.close();
|
||||
}
|
||||
}
|
||||
} else if (t1 == 3) {
|
||||
} else if (t1 == 2) {
|
||||
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("MockRandomCodec: reading Block terms dict");
|
||||
|
@ -374,7 +352,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (t1 == 4) {
|
||||
} else if (t1 == 3) {
|
||||
// Use OrdsBlockTree terms dict
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("MockRandomCodec: reading OrdsBlockTree terms dict");
|
||||
|
|
|
@ -45,7 +45,6 @@ import org.apache.lucene.codecs.bloom.TestBloomFilteredLucenePostings;
|
|||
import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
|
||||
import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
|
||||
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
|
||||
import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat;
|
||||
import org.apache.lucene.codecs.memory.FSTPostingsFormat;
|
||||
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
|
@ -190,7 +189,6 @@ public class RandomCodec extends AssertingCodec {
|
|||
add(avoidCodecs,
|
||||
TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock, RandomPicks.randomFrom(random, BlockTreeTermsReader.FSTLoadMode.values())),
|
||||
new FSTPostingsFormat(),
|
||||
new FSTOrdPostingsFormat(),
|
||||
new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock),
|
||||
LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : lowFreqCutoff)),
|
||||
//TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucenePostings to be constructed
|
||||
|
|
Loading…
Reference in New Issue