LUCENE-9116: Remove long[] from `PostingsWriterBase#encodeTerm`. (#1149) (#1158)

All the metadata can be directly encoded in the `DataOutput`.
This commit is contained in:
Adrien Grand 2020-01-17 13:39:45 +01:00 committed by GitHub
parent 5f2d7c4855
commit fb3ca8d000
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 128 additions and 1780 deletions

View File

@ -91,6 +91,12 @@ API Changes
yield Passages sized a little different due to the fact that the sizing pivot is now the center of the first match and
not its left edge.
* LUCENE-9116: PostingsWriterBase and PostingsReaderBase no longer support
setting a field's metadata via a `long[]`. (Adrien Grand)
* LUCENE-9116: The FSTOrd postings format has been removed.
(Adrien Grand)
* LUCENE-8369: Remove obsolete spatial module. (Nick Knize, David Smiley)
New Features

View File

@ -154,7 +154,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
}
@Override
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
@ -167,11 +167,11 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
termState.payStartFP = 0;
}
termState.docStartFP += longs[0];
termState.docStartFP += in.readVLong();
if (fieldHasPositions) {
termState.posStartFP += longs[1];
termState.posStartFP += in.readVLong();
if (fieldHasOffsets || fieldHasPayloads) {
termState.payStartFP += longs[2];
termState.payStartFP += in.readVLong();
}
}
if (termState.docFreq == 1) {

View File

@ -187,20 +187,11 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
}
@Override
public int setField(FieldInfo fieldInfo) {
public void setField(FieldInfo fieldInfo) {
super.setField(fieldInfo);
skipWriter.setField(writePositions, writeOffsets, writePayloads);
lastState = emptyState;
fieldHasNorms = fieldInfo.hasNorms();
if (writePositions) {
if (writePayloads || writeOffsets) {
return 3; // doc + pos + pay FP
} else {
return 2; // doc + pos FP
}
} else {
return 1; // doc FP
}
}
@Override
@ -463,16 +454,16 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
}
@Override
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
IntBlockTermState state = (IntBlockTermState)_state;
if (absolute) {
lastState = emptyState;
}
longs[0] = state.docStartFP - lastState.docStartFP;
out.writeVLong(state.docStartFP - lastState.docStartFP);
if (writePositions) {
longs[1] = state.posStartFP - lastState.posStartFP;
out.writeVLong(state.posStartFP - lastState.posStartFP);
if (writePayloads || writeOffsets) {
longs[2] = state.payStartFP - lastState.payStartFP;
out.writeVLong(state.payStartFP - lastState.payStartFP);
}
}
if (state.singletonDocID != -1) {

View File

@ -145,7 +145,6 @@ public class BlockTermsReader extends FieldsProducer {
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in);
}
@ -155,7 +154,7 @@ public class BlockTermsReader extends FieldsProducer {
if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
}
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in);
}
@ -223,9 +222,8 @@ public class BlockTermsReader extends FieldsProducer {
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
final int longsSize;
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
@ -233,7 +231,6 @@ public class BlockTermsReader extends FieldsProducer {
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}
@Override
@ -326,7 +323,6 @@ public class BlockTermsReader extends FieldsProducer {
private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
private int metaDataUpto;
private long[] longs;
private byte[] bytes;
private ByteArrayDataInput bytesReader;
@ -343,7 +339,6 @@ public class BlockTermsReader extends FieldsProducer {
termSuffixes = new byte[128];
docFreqBytes = new byte[64];
//System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
longs = new long[longsSize];
}
// TODO: we may want an alternate mode here which is
@ -826,10 +821,7 @@ public class BlockTermsReader extends FieldsProducer {
//System.out.println(" totTF=" + state.totalTermFreq);
}
// metadata
for (int i = 0; i < longs.length; i++) {
longs[i] = bytesReader.readVLong();
}
postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
postingsReader.decodeTerm(bytesReader, fieldInfo, state, absolute);
metaDataUpto++;
absolute = false;
}

View File

@ -81,9 +81,8 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
public final int longsSize;
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.termsStartPointer = termsStartPointer;
@ -91,7 +90,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}
}
@ -176,7 +174,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
}
writeTrailer(dirStart);
CodecUtil.writeFooter(out);
@ -206,7 +203,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
long sumTotalTermFreq;
long sumDocFreq;
int docCount;
int longsSize;
private TermEntry[] pendingTerms;
@ -226,7 +222,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
}
termsStartPointer = out.getFilePointer();
this.postingsWriter = postingsWriter;
this.longsSize = postingsWriter.setField(fieldInfo);
postingsWriter.setField(fieldInfo);
}
private final BytesRefBuilder lastPrevTerm = new BytesRefBuilder();
@ -285,8 +281,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
termsStartPointer,
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 ? sumTotalTermFreq : -1,
sumDocFreq,
docsSeen.cardinality(),
longsSize));
docsSeen.cardinality()));
}
}
@ -307,7 +302,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
}
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput bufferWriter = ByteBuffersDataOutput.newResettableInstance();
private void flushBlock() throws IOException {
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
@ -353,16 +347,10 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable {
bytesWriter.reset();
// 4th pass: write the metadata
long[] longs = new long[longsSize];
boolean absolute = true;
for(int termCount=0;termCount<pendingCount;termCount++) {
final BlockTermState state = pendingTerms[termCount].state;
postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
for (int i = 0; i < longsSize; i++) {
bytesWriter.writeVLong(longs[i]);
}
bufferWriter.copyTo(bytesWriter);
bufferWriter.reset();
postingsWriter.encodeTerm(bytesWriter, fieldInfo, state, absolute);
absolute = false;
}
out.writeVInt(Math.toIntExact(bytesWriter.size()));

View File

@ -130,7 +130,6 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
// when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
// System.out.println(" longsSize=" + longsSize);
BytesRef minTerm = readBytesRef(in);
@ -147,7 +146,7 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
final long indexStartFP = indexIn.readVLong();
OrdsFieldReader previous = fields.put(fieldInfo.name,
new OrdsFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
indexStartFP, longsSize, indexIn, minTerm, maxTerm));
indexStartFP, indexIn, minTerm, maxTerm));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
}

View File

@ -143,12 +143,11 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
private final int longsSize;
public final BytesRef minTerm;
public final BytesRef maxTerm;
public FieldMetaData(FieldInfo fieldInfo, Output rootCode, long numTerms, long indexStartFP,
long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize,
long sumTotalTermFreq, long sumDocFreq, int docCount,
BytesRef minTerm, BytesRef maxTerm) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
@ -159,7 +158,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
}
@ -424,7 +422,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
class TermsWriter {
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
final FixedBitSet docsSeen;
long sumTotalTermFreq;
@ -439,8 +436,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
private int[] prefixStarts = new int[8];
private final long[] longs;
// Pending stack of terms and blocks. As terms arrive (in sorted order)
// we append to this stack, and once the top of the stack has enough
// terms starting with a common prefix, we write a new block with
@ -633,13 +628,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
}
// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.copyTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
}
totalTermCount = end-start;
@ -684,13 +673,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
// separate anymore:
// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.copyTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
totalTermCount++;
@ -763,8 +746,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
TermsWriter(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
docsSeen = new FixedBitSet(maxDoc);
this.longsSize = postingsWriter.setField(fieldInfo);
this.longs = new long[longsSize];
postingsWriter.setField(fieldInfo);
}
/** Writes one term's worth of postings. */
@ -874,7 +856,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
sumTotalTermFreq,
sumDocFreq,
docsSeen.cardinality(),
longsSize,
minTerm, maxTerm));
} else {
assert docsSeen.cardinality() == 0;
@ -884,7 +865,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
}
private boolean closed;
@ -916,7 +896,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
indexOut.writeVLong(field.indexStartFP);
writeBytesRef(out, field.minTerm);
writeBytesRef(out, field.maxTerm);

View File

@ -46,7 +46,6 @@ final class OrdsFieldReader extends Terms implements Accountable {
final Output rootCode;
final BytesRef minTerm;
final BytesRef maxTerm;
final int longsSize;
final OrdsBlockTreeTermsReader parent;
final FST<Output> index;
@ -54,7 +53,7 @@ final class OrdsFieldReader extends Terms implements Accountable {
OrdsFieldReader(OrdsBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms,
Output rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
@ -65,7 +64,6 @@ final class OrdsFieldReader extends Terms implements Accountable {
this.docCount = docCount;
this.indexStartFP = indexStartFP;
this.rootCode = rootCode;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
// if (DEBUG) {

View File

@ -84,9 +84,7 @@ final class OrdsIntersectTermsEnumFrame {
final BlockTermState termState;
// metadata buffer, holding monotonic values
public long[] longs;
// metadata buffer, holding general values
// metadata
public byte[] bytes;
ByteArrayDataInput bytesReader;
@ -103,7 +101,6 @@ final class OrdsIntersectTermsEnumFrame {
this.ord = ord;
this.termState = ite.fr.parent.postingsReader.newTermState();
this.termState.totalTermFreq = -1;
this.longs = new long[ite.fr.longsSize];
}
void loadNextFloorBlock() throws IOException {
@ -298,11 +295,8 @@ final class OrdsIntersectTermsEnumFrame {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
// metadata
for (int i = 0; i < ite.fr.longsSize; i++) {
longs[i] = bytesReader.readVLong();
}
ite.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ite.fr.fieldInfo, termState, absolute);
// metadata
ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute);
metaDataUpto++;
absolute = false;

View File

@ -97,9 +97,7 @@ final class OrdsSegmentTermsEnumFrame {
final BlockTermState state;
// metadata buffer, holding monotonic values
public long[] longs;
// metadata buffer, holding general values
// metadata
public byte[] bytes;
ByteArrayDataInput bytesReader;
@ -110,7 +108,6 @@ final class OrdsSegmentTermsEnumFrame {
this.ord = ord;
this.state = ste.fr.parent.postingsReader.newTermState();
this.state.totalTermFreq = -1;
this.longs = new long[ste.fr.longsSize];
}
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
@ -507,11 +504,8 @@ final class OrdsSegmentTermsEnumFrame {
}
//if (DEBUG) System.out.println(" longsSize=" + ste.fr.longsSize);
// metadata
for (int i = 0; i < ste.fr.longsSize; i++) {
longs[i] = bytesReader.readVLong();
}
ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute);
// metadata
ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute);
metaDataUpto++;
absolute = false;

View File

@ -1,78 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import java.io.IOException;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsReader;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.IOUtils;
/**
* FSTOrd term dict + Lucene50PBF
*/
public final class FSTOrdPostingsFormat extends PostingsFormat {
public FSTOrdPostingsFormat() {
super("FSTOrd50");
}
@Override
public String toString() {
return getName();
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret = new FSTOrdTermsWriter(state, postingsWriter);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsWriter);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
boolean success = false;
try {
FieldsProducer ret = new FSTOrdTermsReader(state, postingsReader);
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(postingsReader);
}
}
}
}

View File

@ -1,884 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.TreeMap;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
/**
* FST-based terms dictionary reader.
*
* The FST index maps each term and its ord, and during seek
* the ord is used to fetch metadata from a single block.
* The term dictionary is fully memory resident.
*
* @lucene.experimental
*/
public class FSTOrdTermsReader extends FieldsProducer {
static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL;
final TreeMap<String, TermsReader> fields = new TreeMap<>();
final PostingsReaderBase postingsReader;
//static final boolean TEST = false;
public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION);
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION);
this.postingsReader = postingsReader;
ChecksumIndexInput indexIn = null;
IndexInput blockIn = null;
boolean success = false;
try {
indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context);
blockIn = state.directory.openInput(termsBlockFileName, state.context);
int version = CodecUtil.checkIndexHeader(indexIn, FSTOrdTermsWriter.TERMS_INDEX_CODEC_NAME,
FSTOrdTermsWriter.VERSION_START,
FSTOrdTermsWriter.VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
int version2 = CodecUtil.checkIndexHeader(blockIn, FSTOrdTermsWriter.TERMS_CODEC_NAME,
FSTOrdTermsWriter.VERSION_START,
FSTOrdTermsWriter.VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch: index=" + version + ", terms=" + version2, blockIn);
}
CodecUtil.checksumEntireFile(blockIn);
this.postingsReader.init(blockIn, state);
seekDir(blockIn);
final FieldInfos fieldInfos = state.fieldInfos;
final int numFields = blockIn.readVInt();
for (int i = 0; i < numFields; i++) {
FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt());
boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
long numTerms = blockIn.readVLong();
long sumTotalTermFreq = blockIn.readVLong();
// if freqs are omitted, sumDocFreq=sumTotalTermFreq and we only write one value
long sumDocFreq = hasFreq ? blockIn.readVLong() : sumTotalTermFreq;
int docCount = blockIn.readVInt();
int longsSize = blockIn.readVInt();
FST<Long> index = new FST<>(indexIn, PositiveIntOutputs.getSingleton());
TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index);
TermsReader previous = fields.put(fieldInfo.name, current);
checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous);
}
CodecUtil.checkFooter(indexIn);
success = true;
} finally {
if (success) {
IOUtils.close(indexIn, blockIn);
} else {
IOUtils.closeWhileHandlingException(indexIn, blockIn);
}
}
}
private void seekDir(IndexInput in) throws IOException {
in.seek(in.length() - CodecUtil.footerLength() - 8);
in.seek(in.readLong());
}
private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException {
// #docs with field must be <= #docs
if (field.docCount < 0 || field.docCount > info.maxDoc()) {
throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.maxDoc() + " (blockIn=" + blockIn + ")", indexIn);
}
// #postings must be >= #docs with field
if (field.sumDocFreq < field.docCount) {
throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn);
}
// #positions must be >= #postings
if (field.sumTotalTermFreq < field.sumDocFreq) {
throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn);
}
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (blockIn=" + blockIn + ")", indexIn);
}
}
@Override
public Iterator<String> iterator() {
return Collections.unmodifiableSet(fields.keySet()).iterator();
}
@Override
public Terms terms(String field) throws IOException {
assert field != null;
return fields.get(field);
}
@Override
public int size() {
return fields.size();
}
@Override
public void close() throws IOException {
try {
IOUtils.close(postingsReader);
} finally {
fields.clear();
}
}
final class TermsReader extends Terms implements Accountable {
final FieldInfo fieldInfo;
final long numTerms;
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
final int longsSize;
final FST<Long> index;
final int numSkipInfo;
final long[] skipInfo;
final byte[] statsBlock;
final byte[] metaLongsBlock;
final byte[] metaBytesBlock;
TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<Long> index) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.index = index;
assert (numTerms & (~0xffffffffL)) == 0;
final int numBlocks = (int)(numTerms + INTERVAL - 1) / INTERVAL;
this.numSkipInfo = longsSize + 3;
this.skipInfo = new long[numBlocks * numSkipInfo];
this.statsBlock = new byte[(int)blockIn.readVLong()];
this.metaLongsBlock = new byte[(int)blockIn.readVLong()];
this.metaBytesBlock = new byte[(int)blockIn.readVLong()];
int last = 0, next = 0;
for (int i = 1; i < numBlocks; i++) {
next = numSkipInfo * i;
for (int j = 0; j < numSkipInfo; j++) {
skipInfo[next + j] = skipInfo[last + j] + blockIn.readVLong();
}
last = next;
}
blockIn.readBytes(statsBlock, 0, statsBlock.length);
blockIn.readBytes(metaLongsBlock, 0, metaLongsBlock.length);
blockIn.readBytes(metaBytesBlock, 0, metaBytesBlock.length);
}
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
@Override
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return fieldInfo.hasPayloads();
}
@Override
public long size() {
return numTerms;
}
@Override
public long getSumTotalTermFreq() {
return sumTotalTermFreq;
}
@Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
@Override
public int getDocCount() throws IOException {
return docCount;
}
@Override
public TermsEnum iterator() throws IOException {
return new SegmentTermsEnum();
}
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new IntersectTermsEnum(compiled, startTerm);
}
@Override
public long ramBytesUsed() {
long ramBytesUsed = 0;
if (index != null) {
ramBytesUsed += index.ramBytesUsed();
ramBytesUsed += RamUsageEstimator.sizeOf(metaBytesBlock);
ramBytesUsed += RamUsageEstimator.sizeOf(metaLongsBlock);
ramBytesUsed += RamUsageEstimator.sizeOf(skipInfo);
ramBytesUsed += RamUsageEstimator.sizeOf(statsBlock);
}
return ramBytesUsed;
}
@Override
public Collection<Accountable> getChildResources() {
if (index == null) {
return Collections.emptyList();
} else {
return Collections.singletonList(Accountables.namedAccountable("terms", index));
}
}
@Override
public String toString() {
return "FSTOrdTerms(terms=" + numTerms + ",postings=" + sumDocFreq + ",positions=" + sumTotalTermFreq + ",docs=" + docCount + ")";
}
// Only wraps common operations for PBF interact
abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum {
/* Current term's ord, starts from 0 */
long ord;
/* Current term stats + decoded metadata (customized by PBF) */
final BlockTermState state;
/* Datainput to load stats & metadata */
final ByteArrayDataInput statsReader = new ByteArrayDataInput();
final ByteArrayDataInput metaLongsReader = new ByteArrayDataInput();
final ByteArrayDataInput metaBytesReader = new ByteArrayDataInput();
/* To which block is buffered */
int statsBlockOrd;
int metaBlockOrd;
/* Current buffered metadata (long[] & byte[]) */
long[][] longs;
int[] bytesStart;
int[] bytesLength;
/* Current buffered stats (df & ttf) */
int[] docFreq;
long[] totalTermFreq;
BaseTermsEnum() throws IOException {
this.state = postingsReader.newTermState();
this.statsReader.reset(statsBlock);
this.metaLongsReader.reset(metaLongsBlock);
this.metaBytesReader.reset(metaBytesBlock);
this.longs = new long[INTERVAL][longsSize];
this.bytesStart = new int[INTERVAL];
this.bytesLength = new int[INTERVAL];
this.docFreq = new int[INTERVAL];
this.totalTermFreq = new long[INTERVAL];
this.statsBlockOrd = -1;
this.metaBlockOrd = -1;
}
/** Decodes stats data into term state */
void decodeStats() throws IOException {
final int upto = (int)ord % INTERVAL;
final int oldBlockOrd = statsBlockOrd;
statsBlockOrd = (int)ord / INTERVAL;
if (oldBlockOrd != statsBlockOrd) {
refillStats();
}
state.docFreq = docFreq[upto];
state.totalTermFreq = totalTermFreq[upto];
}
/** Let PBF decode metadata */
void decodeMetaData() throws IOException {
final int upto = (int)ord % INTERVAL;
final int oldBlockOrd = metaBlockOrd;
metaBlockOrd = (int)ord / INTERVAL;
if (metaBlockOrd != oldBlockOrd) {
refillMetadata();
}
metaBytesReader.setPosition(bytesStart[upto]);
postingsReader.decodeTerm(longs[upto], metaBytesReader, fieldInfo, state, true);
}
/** Load current stats shard */
final void refillStats() throws IOException {
final int offset = statsBlockOrd * numSkipInfo;
final int statsFP = (int)skipInfo[offset];
statsReader.setPosition(statsFP);
for (int i = 0; i < INTERVAL && !statsReader.eof(); i++) {
int code = statsReader.readVInt();
if (hasFreqs()) {
docFreq[i] = (code >>> 1);
if ((code & 1) == 1) {
totalTermFreq[i] = docFreq[i];
} else {
totalTermFreq[i] = docFreq[i] + statsReader.readVLong();
}
} else {
docFreq[i] = code;
totalTermFreq[i] = code;
}
}
}
/** Load current metadata shard */
final void refillMetadata() throws IOException {
final int offset = metaBlockOrd * numSkipInfo;
final int metaLongsFP = (int)skipInfo[offset + 1];
final int metaBytesFP = (int)skipInfo[offset + 2];
metaLongsReader.setPosition(metaLongsFP);
for (int j = 0; j < longsSize; j++) {
longs[0][j] = skipInfo[offset + 3 + j] + metaLongsReader.readVLong();
}
bytesStart[0] = metaBytesFP;
bytesLength[0] = (int)metaLongsReader.readVLong();
for (int i = 1; i < INTERVAL && !metaLongsReader.eof(); i++) {
for (int j = 0; j < longsSize; j++) {
longs[i][j] = longs[i-1][j] + metaLongsReader.readVLong();
}
bytesStart[i] = bytesStart[i-1] + bytesLength[i-1];
bytesLength[i] = (int)metaLongsReader.readVLong();
}
}
@Override
public TermState termState() throws IOException {
decodeMetaData();
return state.clone();
}
@Override
public int docFreq() throws IOException {
return state.docFreq;
}
@Override
public long totalTermFreq() throws IOException {
return state.totalTermFreq;
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
decodeMetaData();
return postingsReader.postings(fieldInfo, state, reuse, flags);
}
@Override
public ImpactsEnum impacts(int flags) throws IOException {
decodeMetaData();
return postingsReader.impacts(fieldInfo, state, flags);
}
// TODO: this can be achieved by making use of Util.getByOutput()
// and should have related tests
@Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long ord() {
throw new UnsupportedOperationException();
}
}
// Iterates through all terms in this field
private final class SegmentTermsEnum extends BaseTermsEnum {
final BytesRefFSTEnum<Long> fstEnum;
/* Current term, null when enum ends or unpositioned */
BytesRef term;
/* True when current term's metadata is decoded */
boolean decoded;
/* True when current enum is 'positioned' by seekExact(TermState) */
boolean seekPending;
SegmentTermsEnum() throws IOException {
this.fstEnum = new BytesRefFSTEnum<>(index);
this.decoded = false;
this.seekPending = false;
}
@Override
public BytesRef term() throws IOException {
return term;
}
@Override
void decodeMetaData() throws IOException {
if (!decoded && !seekPending) {
super.decodeMetaData();
decoded = true;
}
}
// Update current enum according to FSTEnum
void updateEnum(final InputOutput<Long> pair) throws IOException {
if (pair == null) {
term = null;
} else {
term = pair.input;
ord = pair.output;
decodeStats();
}
decoded = false;
seekPending = false;
}
@Override
public BytesRef next() throws IOException {
if (seekPending) { // previously positioned, but termOutputs not fetched
seekPending = false;
SeekStatus status = seekCeil(term);
assert status == SeekStatus.FOUND; // must positioned on valid term
}
updateEnum(fstEnum.next());
return term;
}
@Override
public boolean seekExact(BytesRef target) throws IOException {
updateEnum(fstEnum.seekExact(target));
return term != null;
}
@Override
public SeekStatus seekCeil(BytesRef target) throws IOException {
updateEnum(fstEnum.seekCeil(target));
if (term == null) {
return SeekStatus.END;
} else {
return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
}
}
@Override
public void seekExact(BytesRef target, TermState otherState) {
if (!target.equals(term)) {
state.copyFrom(otherState);
term = BytesRef.deepCopyOf(target);
seekPending = true;
}
}
}
// Iterates intersect result with automaton (cannot seek!)
private final class IntersectTermsEnum extends BaseTermsEnum {
/* Current term, null when enum ends or unpositioned */
BytesRefBuilder term;
/* True when current term's metadata is decoded */
boolean decoded;
/* True when there is pending term when calling next() */
boolean pending;
/* stack to record how current term is constructed,
* used to accumulate metadata or rewind term:
* level == term.length + 1,
* == 0 when term is null */
Frame[] stack;
int level;
/* term dict fst */
final FST<Long> fst;
final FST.BytesReader fstReader;
final Outputs<Long> fstOutputs;
/* query automaton to intersect with */
final ByteRunAutomaton fsa;
private final class Frame {
/* fst stats */
FST.Arc<Long> arc;
Long output;
/* automaton stats */
int state;
Frame() {
this.arc = new FST.Arc<>();
this.state = -1;
}
public String toString() {
return "arc=" + arc + " state=" + state;
}
}
IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
//if (TEST) System.out.println("Enum init, startTerm=" + startTerm);
this.fst = index;
this.fstReader = fst.getBytesReader();
this.fstOutputs = index.outputs;
this.fsa = compiled.runAutomaton;
this.level = -1;
this.stack = new Frame[16];
for (int i = 0 ; i < stack.length; i++) {
this.stack[i] = new Frame();
}
Frame frame;
frame = loadVirtualFrame(newFrame());
this.level++;
frame = loadFirstFrame(newFrame());
pushFrame(frame);
this.decoded = false;
this.pending = false;
if (startTerm == null) {
pending = isAccept(topFrame());
} else {
doSeekCeil(startTerm);
pending = (term == null || !startTerm.equals(term.get())) && isValid(topFrame()) && isAccept(topFrame());
}
}
@Override
public BytesRef term() throws IOException {
return term == null ? null : term.get();
}
@Override
void decodeMetaData() throws IOException {
if (!decoded) {
super.decodeMetaData();
decoded = true;
}
}
@Override
void decodeStats() throws IOException {
ord = topFrame().output;
super.decodeStats();
}
@Override
public SeekStatus seekCeil(BytesRef target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BytesRef next() throws IOException {
//if (TEST) System.out.println("Enum next()");
if (pending) {
pending = false;
decodeStats();
return term();
}
decoded = false;
DFS:
while (level > 0) {
Frame frame = newFrame();
if (loadExpandFrame(topFrame(), frame) != null) { // has valid target
pushFrame(frame);
if (isAccept(frame)) { // gotcha
break;
}
continue; // check next target
}
frame = popFrame();
while(level > 0) {
if (loadNextFrame(topFrame(), frame) != null) { // has valid sibling
pushFrame(frame);
if (isAccept(frame)) { // gotcha
break DFS;
}
continue DFS; // check next target
}
frame = popFrame();
}
return null;
}
decodeStats();
return term();
}
BytesRef doSeekCeil(BytesRef target) throws IOException {
//if (TEST) System.out.println("Enum doSeekCeil()");
Frame frame= null;
int label, upto = 0, limit = target.length;
while (upto < limit) { // to target prefix, or ceil label (rewind prefix)
frame = newFrame();
label = target.bytes[upto] & 0xff;
frame = loadCeilFrame(label, topFrame(), frame);
if (frame == null || frame.arc.label() != label) {
break;
}
assert isValid(frame); // target must be fetched from automaton
pushFrame(frame);
upto++;
}
if (upto == limit) { // got target
return term();
}
if (frame != null) { // got larger term('s prefix)
pushFrame(frame);
return isAccept(frame) ? term() : next();
}
while (level > 0) { // got target's prefix, advance to larger term
frame = popFrame();
while (level > 0 && !canRewind(frame)) {
frame = popFrame();
}
if (loadNextFrame(topFrame(), frame) != null) {
pushFrame(frame);
return isAccept(frame) ? term() : next();
}
}
return null;
}
/** Virtual frame, never pop */
Frame loadVirtualFrame(Frame frame) {
frame.output = fstOutputs.getNoOutput();
frame.state = -1;
return frame;
}
/** Load frame for start arc(node) on fst */
Frame loadFirstFrame(Frame frame) {
frame.arc = fst.getFirstArc(frame.arc);
frame.output = frame.arc.output();
frame.state = 0;
return frame;
}
/** Load frame for target arc(node) on fst */
Frame loadExpandFrame(Frame top, Frame frame) throws IOException {
if (!canGrow(top)) {
return null;
}
frame.arc = fst.readFirstRealTargetArc(top.arc.target(), frame.arc, fstReader);
frame.state = fsa.step(top.state, frame.arc.label());
frame.output = frame.arc.output();
//if (TEST) System.out.println(" loadExpand frame="+frame);
if (frame.state == -1) {
return loadNextFrame(top, frame);
}
return frame;
}
/** Load frame for sibling arc(node) on fst */
Frame loadNextFrame(Frame top, Frame frame) throws IOException {
if (!canRewind(frame)) {
return null;
}
while (!frame.arc.isLast()) {
frame.arc = fst.readNextRealArc(frame.arc, fstReader);
frame.output = frame.arc.output();
frame.state = fsa.step(top.state, frame.arc.label());
if (frame.state != -1) {
break;
}
}
//if (TEST) System.out.println(" loadNext frame="+frame);
if (frame.state == -1) {
return null;
}
return frame;
}
/** Load frame for target arc(node) on fst, so that
* arc.label &gt;= label and !fsa.reject(arc.label) */
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
FST.Arc<Long> arc = frame.arc;
arc = Util.readCeilArc(label, fst, top.arc, arc, fstReader);
if (arc == null) {
return null;
}
frame.state = fsa.step(top.state, arc.label());
//if (TEST) System.out.println(" loadCeil frame="+frame);
if (frame.state == -1) {
return loadNextFrame(top, frame);
}
frame.output = arc.output();
return frame;
}
boolean isAccept(Frame frame) { // reach a term both fst&fsa accepts
return fsa.isAccept(frame.state) && frame.arc.isFinal();
}
boolean isValid(Frame frame) { // reach a prefix both fst&fsa won't reject
return /*frame != null &&*/ frame.state != -1;
}
boolean canGrow(Frame frame) { // can walk forward on both fst&fsa
return frame.state != -1 && FST.targetHasArcs(frame.arc);
}
boolean canRewind(Frame frame) { // can jump to sibling
return !frame.arc.isLast();
}
void pushFrame(Frame frame) {
final FST.Arc<Long> arc = frame.arc;
frame.output = fstOutputs.add(topFrame().output, frame.output);
term = grow(arc.label());
level++;
assert frame == stack[level];
}
Frame popFrame() {
term = shrink();
return stack[level--];
}
Frame newFrame() {
if (level+1 == stack.length) {
final Frame[] temp = new Frame[ArrayUtil.oversize(level+2, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(stack, 0, temp, 0, stack.length);
for (int i = stack.length; i < temp.length; i++) {
temp[i] = new Frame();
}
stack = temp;
}
return stack[level+1];
}
Frame topFrame() {
return stack[level];
}
BytesRefBuilder grow(int label) {
if (term == null) {
term = new BytesRefBuilder();
} else {
term.append((byte) label);
}
return term;
}
BytesRefBuilder shrink() {
if (term.length() == 0) {
term = null;
} else {
term.setLength(term.length() - 1);
}
return term;
}
}
}
static<T> void walk(FST<T> fst) throws IOException {
final ArrayList<FST.Arc<T>> queue = new ArrayList<>();
final BitSet seen = new BitSet();
final FST.BytesReader reader = fst.getBytesReader();
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
queue.add(startArc);
while (!queue.isEmpty()) {
final FST.Arc<T> arc = queue.remove(0);
final long node = arc.target();
//System.out.println(arc);
if (FST.targetHasArcs(arc) && !seen.get((int) node)) {
seen.set((int) node);
fst.readFirstRealTargetArc(node, arc, reader);
while (true) {
queue.add(new FST.Arc<T>().copyFrom(arc));
if (arc.isLast()) {
break;
} else {
fst.readNextRealArc(arc, reader);
}
}
}
}
}
@Override
public long ramBytesUsed() {
long ramBytesUsed = postingsReader.ramBytesUsed();
for (TermsReader r : fields.values()) {
ramBytesUsed += r.ramBytesUsed();
}
return ramBytesUsed;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>(Accountables.namedAccountables("field", fields));
resources.add(Accountables.namedAccountable("delegate", postingsReader));
return Collections.unmodifiableList(resources);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(fields=" + fields.size() + ",delegate=" + postingsReader + ")";
}
@Override
public void checkIntegrity() throws IOException {
postingsReader.checkIntegrity();
}
}

View File

@ -1,386 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
/**
* FST-based term dict, using ord as FST output.
*
* The FST holds the mapping between &lt;term, ord&gt;, and
* term's metadata is delta encoded into a single byte block.
*
* Typically the byte block consists of four parts:
* 1. term statistics: docFreq, totalTermFreq;
* 2. monotonic long[], e.g. the pointer to the postings list for that term;
* 3. generic byte[], e.g. other information customized by postings base.
* 4. single-level skip list to speed up metadata decoding by ord.
*
* <p>
* Files:
* <ul>
* <li><tt>.tix</tt>: <a href="#Termindex">Term Index</a></li>
* <li><tt>.tbk</tt>: <a href="#Termblock">Term Block</a></li>
* </ul>
*
* <a name="Termindex"></a>
* <h3>Term Index</h3>
* <p>
* The .tix contains a list of FSTs, one for each field.
* The FST maps a term to its corresponding order in current field.
* </p>
*
* <ul>
* <li>TermIndex(.tix) --&gt; Header, TermFST<sup>NumFields</sup>, Footer</li>
* <li>TermFST --&gt; {@link FST FST&lt;long&gt;}</li>
* <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
*
* <p>Notes:</p>
* <ul>
* <li>
* Since terms are already sorted before writing to <a href="#Termblock">Term Block</a>,
* their ords can directly used to seek term metadata from term block.
* </li>
* </ul>
*
* <a name="Termblock"></a>
* <h3>Term Block</h3>
* <p>
* The .tbk contains all the statistics and metadata for terms, along with field summary (e.g.
* per-field data like number of documents in current field). For each field, there are four blocks:
* <ul>
* <li>statistics bytes block: contains term statistics; </li>
* <li>metadata longs block: delta-encodes monotonic part of metadata; </li>
* <li>metadata bytes block: encodes other parts of metadata; </li>
* <li>skip block: contains skip data, to speed up metadata seeking and decoding</li>
* </ul>
*
* <p>File Format:</p>
* <ul>
* <li>TermBlock(.tbk) --&gt; Header, <i>PostingsHeader</i>, FieldSummary, DirOffset</li>
* <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq,
* DocCount, LongsSize, DataBlock &gt; <sup>NumFields</sup>, Footer</li>
*
* <li>DataBlock --&gt; StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock </li>
* <li>SkipBlock --&gt; &lt; StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta,
* MetaLongsSkipDelta<sup>LongsSize</sup> &gt;<sup>NumTerms</sup>
* <li>StatsBlock --&gt; &lt; DocFreq[Same?], (TotalTermFreq-DocFreq) ? &gt; <sup>NumTerms</sup>
* <li>MetaLongsBlock --&gt; &lt; LongDelta<sup>LongsSize</sup>, BytesSize &gt; <sup>NumTerms</sup>
* <li>MetaBytesBlock --&gt; Byte <sup>MetaBytesBlockLength</sup>
* <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}</li>
* <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
* <li>NumFields, FieldNumber, DocCount, DocFreq, LongsSize,
* FieldNumber, DocCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength,
* StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq,
* LongDelta,--&gt; {@link DataOutput#writeVLong VLong}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* <p>Notes: </p>
* <ul>
* <li>
* The format of PostingsHeader and MetaBytes are customized by the specific postings implementation:
* they contain arbitrary per-file data (such as parameters or versioning information), and per-term data
* (non-monotonic ones like pulsed postings data).
* </li>
* <li>
* During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek
* term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset
* for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates
* the value of preceding metadata longs for every SkipInterval's term.
* </li>
* <li>
* DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term.
* Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case,
* so that encoding of TotalTermFreq may be omitted.
* </li>
* </ul>
*
* @lucene.experimental
*/
public class FSTOrdTermsWriter extends FieldsConsumer {
static final String TERMS_INDEX_EXTENSION = "tix";
static final String TERMS_BLOCK_EXTENSION = "tbk";
static final String TERMS_CODEC_NAME = "FSTOrdTerms";
static final String TERMS_INDEX_CODEC_NAME = "FSTOrdIndex";
public static final int VERSION_START = 2;
public static final int VERSION_CURRENT = VERSION_START;
public static final int SKIP_INTERVAL = 8;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
final int maxDoc;
final List<FieldMetaData> fields = new ArrayList<>();
IndexOutput blockOut = null;
IndexOutput indexOut = null;
public FSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException {
final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_BLOCK_EXTENSION);
this.postingsWriter = postingsWriter;
this.fieldInfos = state.fieldInfos;
this.maxDoc = state.segmentInfo.maxDoc();
boolean success = false;
try {
this.indexOut = state.directory.createOutput(termsIndexFileName, state.context);
this.blockOut = state.directory.createOutput(termsBlockFileName, state.context);
CodecUtil.writeIndexHeader(indexOut, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.writeIndexHeader(blockOut, TERMS_CODEC_NAME, VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentSuffix);
this.postingsWriter.init(blockOut, state);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(indexOut, blockOut);
}
}
}
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
for(String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
TermsEnum termsEnum = terms.iterator();
TermsWriter termsWriter = new TermsWriter(fieldInfo);
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
FixedBitSet docsSeen = new FixedBitSet(maxDoc);
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
break;
}
BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms);
if (termState != null) {
termsWriter.finishTerm(term, termState);
sumTotalTermFreq += termState.totalTermFreq;
sumDocFreq += termState.docFreq;
}
}
termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality());
}
}
@Override
public void close() throws IOException {
if (blockOut != null) {
boolean success = false;
try {
final long blockDirStart = blockOut.getFilePointer();
// write field summary
blockOut.writeVInt(fields.size());
for (FieldMetaData field : fields) {
blockOut.writeVInt(field.fieldInfo.number);
blockOut.writeVLong(field.numTerms);
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
blockOut.writeVLong(field.sumTotalTermFreq);
}
blockOut.writeVLong(field.sumDocFreq);
blockOut.writeVInt(field.docCount);
blockOut.writeVInt(field.longsSize);
blockOut.writeVLong(field.statsOut.size());
blockOut.writeVLong(field.metaLongsOut.size());
blockOut.writeVLong(field.metaBytesOut.size());
field.skipOut.copyTo(blockOut);
field.statsOut.copyTo(blockOut);
field.metaLongsOut.copyTo(blockOut);
field.metaBytesOut.copyTo(blockOut);
field.dict.save(indexOut);
}
writeTrailer(blockOut, blockDirStart);
CodecUtil.writeFooter(indexOut);
CodecUtil.writeFooter(blockOut);
success = true;
} finally {
if (success) {
IOUtils.close(blockOut, indexOut, postingsWriter);
} else {
IOUtils.closeWhileHandlingException(blockOut, indexOut, postingsWriter);
}
blockOut = null;
}
}
}
private void writeTrailer(IndexOutput out, long dirStart) throws IOException {
out.writeLong(dirStart);
}
private static class FieldMetaData {
public FieldInfo fieldInfo;
public long numTerms;
public long sumTotalTermFreq;
public long sumDocFreq;
public int docCount;
public int longsSize;
public FST<Long> dict;
// TODO: block encode each part
// vint encode next skip point (fully decoded when reading)
public ByteBuffersDataOutput skipOut;
// vint encode df, (ttf-df)
public ByteBuffersDataOutput statsOut;
// vint encode monotonic long[] and length for corresponding byte[]
public ByteBuffersDataOutput metaLongsOut;
// generic byte[]
public ByteBuffersDataOutput metaBytesOut;
}
final class TermsWriter {
private final FSTCompiler<Long> fstCompiler;
private final PositiveIntOutputs outputs;
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
private final IntsRefBuilder scratchTerm = new IntsRefBuilder();
private final ByteBuffersDataOutput statsOut = new ByteBuffersDataOutput();
private final ByteBuffersDataOutput metaLongsOut = new ByteBuffersDataOutput();
private final ByteBuffersDataOutput metaBytesOut = new ByteBuffersDataOutput();
private final ByteBuffersDataOutput skipOut = new ByteBuffersDataOutput();
private long lastBlockStatsFP;
private long lastBlockMetaLongsFP;
private long lastBlockMetaBytesFP;
private long[] lastBlockLongs;
private long[] lastLongs;
private long lastMetaBytesFP;
TermsWriter(FieldInfo fieldInfo) {
this.numTerms = 0;
this.fieldInfo = fieldInfo;
this.longsSize = postingsWriter.setField(fieldInfo);
this.outputs = PositiveIntOutputs.getSingleton();
this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
this.lastBlockStatsFP = 0;
this.lastBlockMetaLongsFP = 0;
this.lastBlockMetaBytesFP = 0;
this.lastBlockLongs = new long[longsSize];
this.lastLongs = new long[longsSize];
this.lastMetaBytesFP = 0;
}
public void finishTerm(BytesRef text, BlockTermState state) throws IOException {
if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0) {
bufferSkip();
}
// write term meta data into fst
final long longs[] = new long[longsSize];
final long delta = state.totalTermFreq - state.docFreq;
if (state.totalTermFreq > 0) {
if (delta == 0) {
statsOut.writeVInt(state.docFreq<<1|1);
} else {
statsOut.writeVInt(state.docFreq<<1);
statsOut.writeVLong(state.totalTermFreq-state.docFreq);
}
} else {
statsOut.writeVInt(state.docFreq);
}
postingsWriter.encodeTerm(longs, metaBytesOut, fieldInfo, state, true);
for (int i = 0; i < longsSize; i++) {
metaLongsOut.writeVLong(longs[i] - lastLongs[i]);
lastLongs[i] = longs[i];
}
metaLongsOut.writeVLong(metaBytesOut.size() - lastMetaBytesFP);
fstCompiler.add(Util.toIntsRef(text, scratchTerm), numTerms);
numTerms++;
lastMetaBytesFP = metaBytesOut.size();
}
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
if (numTerms > 0) {
final FieldMetaData metadata = new FieldMetaData();
metadata.fieldInfo = fieldInfo;
metadata.numTerms = numTerms;
metadata.sumTotalTermFreq = sumTotalTermFreq;
metadata.sumDocFreq = sumDocFreq;
metadata.docCount = docCount;
metadata.longsSize = longsSize;
metadata.skipOut = skipOut;
metadata.statsOut = statsOut;
metadata.metaLongsOut = metaLongsOut;
metadata.metaBytesOut = metaBytesOut;
metadata.dict = fstCompiler.compile();
fields.add(metadata);
}
}
private void bufferSkip() throws IOException {
skipOut.writeVLong(statsOut.size() - lastBlockStatsFP);
skipOut.writeVLong(metaLongsOut.size() - lastBlockMetaLongsFP);
skipOut.writeVLong(metaBytesOut.size() - lastBlockMetaBytesFP);
for (int i = 0; i < longsSize; i++) {
skipOut.writeVLong(lastLongs[i] - lastBlockLongs[i]);
}
lastBlockStatsFP = statsOut.size();
lastBlockMetaLongsFP = metaLongsOut.size();
lastBlockMetaBytesFP = metaBytesOut.size();
System.arraycopy(lastLongs, 0, lastBlockLongs, 0, longsSize);
}
}
}

View File

@ -41,7 +41,6 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
private final static TermData NO_OUTPUT = new TermData();
//private static boolean TEST = false;
private final boolean hasPos;
private final int longsSize;
/**
* Represents the metadata for one term.
@ -50,18 +49,15 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
*/
static class TermData implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermData.class);
long[] longs;
byte[] bytes;
int docFreq;
long totalTermFreq;
TermData() {
this.longs = null;
this.bytes = null;
this.docFreq = 0;
this.totalTermFreq = -1;
}
TermData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) {
this.longs = longs;
TermData(byte[] bytes, int docFreq, long totalTermFreq) {
this.bytes = bytes;
this.docFreq = docFreq;
this.totalTermFreq = totalTermFreq;
@ -70,9 +66,6 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
@Override
public long ramBytesUsed() {
long ramBytesUsed = BASE_RAM_BYTES_USED;
if (longs != null) {
ramBytesUsed += RamUsageEstimator.sizeOf(longs);
}
if (bytes != null) {
ramBytesUsed += RamUsageEstimator.sizeOf(bytes);
}
@ -85,14 +78,7 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
@Override
public int hashCode() {
int hash = 0;
if (longs != null) {
final int end = longs.length;
for (int i = 0; i < end; i++) {
hash -= longs[i];
}
}
if (bytes != null) {
hash = -hash;
final int end = bytes.length;
for (int i = 0; i < end; i++) {
hash += bytes[i];
@ -104,7 +90,7 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
@Override
public String toString() {
return "FSTTermOutputs$TermData longs=" + Arrays.toString(longs) + " bytes=" + Arrays.toString(bytes) + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq;
return "FSTTermOutputs$TermData bytes=" + Arrays.toString(bytes) + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq;
}
@Override
@ -116,15 +102,13 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
}
TermData other = (TermData) other_;
return statsEqual(this, other) &&
longsEqual(this, other) &&
bytesEqual(this, other);
}
}
protected FSTTermOutputs(FieldInfo fieldInfo, int longsSize) {
protected FSTTermOutputs(FieldInfo fieldInfo) {
this.hasPos = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
this.longsSize = longsSize;
}
@Override
@ -145,37 +129,13 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
//if (TEST) System.out.println("ret:"+NO_OUTPUT);
return NO_OUTPUT;
}
assert t1.longs.length == t2.longs.length;
long[] min = t1.longs, max = t2.longs;
int pos = 0;
TermData ret;
while (pos < longsSize && min[pos] == max[pos]) {
pos++;
}
if (pos < longsSize) { // unequal long[]
if (min[pos] > max[pos]) {
min = t2.longs;
max = t1.longs;
}
// check whether strictly smaller
while (pos < longsSize && min[pos] <= max[pos]) {
pos++;
}
if (pos < longsSize || allZero(min)) { // not comparable or all-zero
ret = NO_OUTPUT;
} else {
ret = new TermData(min, null, 0, -1);
}
} else { // equal long[]
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
ret = t1;
} else if (allZero(min)) {
ret = NO_OUTPUT;
} else {
ret = new TermData(min, null, 0, -1);
}
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
ret = t1;
} else {
ret = NO_OUTPUT;
}
//if (TEST) System.out.println("ret:"+ret);
return ret;
@ -188,23 +148,12 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
//if (TEST) System.out.println("ret:"+t1);
return t1;
}
assert t1.longs.length == t2.longs.length;
int pos = 0;
long diff = 0;
long[] share = new long[longsSize];
while (pos < longsSize) {
share[pos] = t1.longs[pos] - t2.longs[pos];
diff += share[pos];
pos++;
}
TermData ret;
if (diff == 0 && statsEqual(t1, t2) && bytesEqual(t1, t2)) {
if (statsEqual(t1, t2) && bytesEqual(t1, t2)) {
ret = NO_OUTPUT;
} else {
ret = new TermData(share, t1.bytes, t1.docFreq, t1.totalTermFreq);
ret = new TermData(t1.bytes, t1.docFreq, t1.totalTermFreq);
}
//if (TEST) System.out.println("ret:"+ret);
return ret;
@ -223,21 +172,12 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
//if (TEST) System.out.println("ret:"+t1);
return t1;
}
assert t1.longs.length == t2.longs.length;
int pos = 0;
long[] accum = new long[longsSize];
while (pos < longsSize) {
accum[pos] = t1.longs[pos] + t2.longs[pos];
pos++;
}
TermData ret;
if (t2.bytes != null || t2.docFreq > 0) {
ret = new TermData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq);
ret = new TermData(t2.bytes, t2.docFreq, t2.totalTermFreq);
} else {
ret = new TermData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq);
ret = new TermData(t1.bytes, t1.docFreq, t1.totalTermFreq);
}
//if (TEST) System.out.println("ret:"+ret);
return ret;
@ -246,13 +186,12 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
@Override
public void write(TermData data, DataOutput out) throws IOException {
assert hasPos || data.totalTermFreq == -1;
int bit0 = allZero(data.longs) ? 0 : 1;
int bit1 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1) << 1;
int bit2 = ((data.docFreq == 0) ? 0 : 1) << 2;
int bits = bit0 | bit1 | bit2;
if (bit1 > 0) { // determine extra length
int bit0 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1);
int bit1 = ((data.docFreq == 0) ? 0 : 1) << 1;
int bits = bit0 | bit1;
if (bit0 > 0) { // determine extra length
if (data.bytes.length < 32) {
bits |= (data.bytes.length << 3);
bits |= (data.bytes.length << 2);
out.writeByte((byte)bits);
} else {
out.writeByte((byte)bits);
@ -261,15 +200,10 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
} else {
out.writeByte((byte)bits);
}
if (bit0 > 0) { // not all-zero case
for (int pos = 0; pos < longsSize; pos++) {
out.writeVLong(data.longs[pos]);
}
}
if (bit1 > 0) { // bytes exists
if (bit0 > 0) { // bytes exists
out.writeBytes(data.bytes, 0, data.bytes.length);
}
if (bit2 > 0) { // stats exist
if (bit1 > 0) { // stats exist
if (hasPos) {
if (data.docFreq == data.totalTermFreq) {
out.writeVInt((data.docFreq << 1) | 1);
@ -285,28 +219,21 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
@Override
public TermData read(DataInput in) throws IOException {
long[] longs = new long[longsSize];
byte[] bytes = null;
int docFreq = 0;
long totalTermFreq = -1;
int bits = in.readByte() & 0xff;
int bit0 = bits & 1;
int bit1 = bits & 2;
int bit2 = bits & 4;
int bytesSize = (bits >>> 3);
if (bit1 > 0 && bytesSize == 0) { // determine extra length
int bytesSize = (bits >>> 2);
if (bit0 > 0 && bytesSize == 0) { // determine extra length
bytesSize = in.readVInt();
}
if (bit0 > 0) { // not all-zero case
for (int pos = 0; pos < longsSize; pos++) {
longs[pos] = in.readVLong();
}
}
if (bit1 > 0) { // bytes exists
if (bit0 > 0) { // bytes exists
bytes = new byte[bytesSize];
in.readBytes(bytes, 0, bytesSize);
}
if (bit2 > 0) { // stats exist
if (bit1 > 0) { // stats exist
int code = in.readVInt();
if (hasPos) {
totalTermFreq = docFreq = code >>> 1;
@ -317,7 +244,7 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
docFreq = code;
}
}
return new TermData(longs, bytes, docFreq, totalTermFreq);
return new TermData(bytes, docFreq, totalTermFreq);
}
@ -326,20 +253,14 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
int bits = in.readByte() & 0xff;
int bit0 = bits & 1;
int bit1 = bits & 2;
int bit2 = bits & 4;
int bytesSize = (bits >>> 3);
if (bit1 > 0 && bytesSize == 0) { // determine extra length
int bytesSize = (bits >>> 2);
if (bit0 > 0 && bytesSize == 0) { // determine extra length
bytesSize = in.readVInt();
}
if (bit0 > 0) { // not all-zero case
for (int pos = 0; pos < longsSize; pos++) {
in.readVLong();
}
}
if (bit1 > 0) { // bytes exists
if (bit0 > 0) { // bytes exists
in.skipBytes(bytesSize);
}
if (bit2 > 0) { // stats exist
if (bit1 > 0) { // stats exist
int code = in.readVInt();
if (hasPos && (code & 1) == 0) {
in.readVLong();
@ -366,18 +287,4 @@ class FSTTermOutputs extends Outputs<FSTTermOutputs.TermData> {
}
return t1.bytes != null && t2.bytes != null && Arrays.equals(t1.bytes, t2.bytes);
}
static boolean longsEqual(final TermData t1, final TermData t2) {
if (t1.longs == null && t2.longs == null) {
return true;
}
return t1.longs != null && t2.longs != null && Arrays.equals(t1.longs, t2.longs);
}
static boolean allZero(final long[] l) {
for (int i = 0; i < l.length; i++) {
if (l[i] != 0) {
return false;
}
}
return true;
}
}

View File

@ -99,8 +99,7 @@ public class FSTTermsReader extends FieldsProducer {
// if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
int docCount = in.readVInt();
int longsSize = in.readVInt();
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize);
TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
TermsReader previous = fields.put(fieldInfo.name, current);
checkFieldSummary(state.segmentInfo, in, current, previous);
}
@ -169,17 +168,15 @@ public class FSTTermsReader extends FieldsProducer {
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
final int longsSize;
final FST<FSTTermOutputs.TermData> dict;
TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException {
TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.dict = new FST<>(in, new FSTTermOutputs(fieldInfo, longsSize));
this.dict = new FST<>(in, new FSTTermOutputs(fieldInfo));
}
@Override
@ -349,7 +346,7 @@ public class FSTTermsReader extends FieldsProducer {
if (meta.bytes != null) {
bytesReader.reset(meta.bytes, 0, meta.bytes.length);
}
postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state, true);
postingsReader.decodeTerm(bytesReader, fieldInfo, state, true);
decoded = true;
}
}
@ -495,7 +492,7 @@ public class FSTTermsReader extends FieldsProducer {
if (meta.bytes != null) {
bytesReader.reset(meta.bytes, 0, meta.bytes.length);
}
postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state, true);
postingsReader.decodeTerm(bytesReader, fieldInfo, state, true);
decoded = true;
}
}

View File

@ -209,7 +209,6 @@ public class FSTTermsWriter extends FieldsConsumer {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
field.dict.save(out);
}
writeTrailer(out, dirStart);
@ -232,16 +231,14 @@ public class FSTTermsWriter extends FieldsConsumer {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
public final int longsSize;
public final FST<FSTTermOutputs.TermData> dict;
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST<FSTTermOutputs.TermData> fst) {
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, FST<FSTTermOutputs.TermData> fst) {
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.dict = fst;
}
}
@ -250,7 +247,6 @@ public class FSTTermsWriter extends FieldsConsumer {
private final FSTCompiler<FSTTermOutputs.TermData> fstCompiler;
private final FSTTermOutputs outputs;
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
private final IntsRefBuilder scratchTerm = new IntsRefBuilder();
@ -259,19 +255,18 @@ public class FSTTermsWriter extends FieldsConsumer {
TermsWriter(FieldInfo fieldInfo) {
this.numTerms = 0;
this.fieldInfo = fieldInfo;
this.longsSize = postingsWriter.setField(fieldInfo);
this.outputs = new FSTTermOutputs(fieldInfo, longsSize);
postingsWriter.setField(fieldInfo);
this.outputs = new FSTTermOutputs(fieldInfo);
this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
}
public void finishTerm(BytesRef text, BlockTermState state) throws IOException {
// write term meta data into fst
final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData();
meta.longs = new long[longsSize];
meta.bytes = null;
meta.docFreq = state.docFreq;
meta.totalTermFreq = state.totalTermFreq;
postingsWriter.encodeTerm(meta.longs, metaWriter, fieldInfo, state, true);
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, true);
if (metaWriter.size() > 0) {
meta.bytes = metaWriter.toArrayCopy();
metaWriter.reset();
@ -284,7 +279,7 @@ public class FSTTermsWriter extends FieldsConsumer {
// save FST dict
if (numTerms > 0) {
final FST<FSTTermOutputs.TermData> fst = fstCompiler.compile();
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst));
fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, fst));
}
}
}

View File

@ -94,7 +94,7 @@ public class DeltaBaseTermStateSerializer implements Accountable {
/**
* Writes a {@link BlockTermState} to the provided {@link DataOutput}.
* <p>
* Simpler variant of {@link Lucene84PostingsWriter#encodeTerm(long[], DataOutput, FieldInfo, BlockTermState, boolean)}.
* Simpler variant of {@link Lucene84PostingsWriter#encodeTerm(DataOutput, FieldInfo, BlockTermState, boolean)}.
*/
public void writeTermState(DataOutput termStatesOutput, FieldInfo fieldInfo, BlockTermState termState) throws IOException {
IndexOptions indexOptions = fieldInfo.getIndexOptions();
@ -143,7 +143,7 @@ public class DeltaBaseTermStateSerializer implements Accountable {
/**
* Reads a {@link BlockTermState} from the provided {@link DataInput}.
* <p>
* Simpler variant of {@link Lucene84PostingsReader#decodeTerm(long[], DataInput, FieldInfo, BlockTermState, boolean)}.
* Simpler variant of {@link Lucene84PostingsReader#decodeTerm(DataInput, FieldInfo, BlockTermState, boolean)}.
*
* @param reuse {@link BlockTermState} to reuse; or null to create a new one.
*/

View File

@ -16,7 +16,6 @@
org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat
org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat
org.apache.lucene.codecs.memory.DirectPostingsFormat
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
org.apache.lucene.codecs.memory.FSTPostingsFormat
org.apache.lucene.codecs.uniformsplit.UniformSplitPostingsFormat
org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPostingsFormat

View File

@ -1,34 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.memory;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.util.TestUtil;
/**
* Tests FSTOrdPostingsFormat
*/
public class TestFSTOrdPostingsFormat extends BasePostingsFormatTestCase {
private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTOrdPostingsFormat());
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -159,7 +159,7 @@ public class TestTermBytesComparator extends LuceneTestCase {
}
@Override
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {
}
@Override

View File

@ -268,7 +268,7 @@ public class STBlockReaderTest extends LuceneTestCase {
}
@Override
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {
}
@Override

View File

@ -61,7 +61,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
/** Actually decode metadata for next term
* @see PostingsWriterBase#encodeTerm
*/
public abstract void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
public abstract void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
/** Must fully consume state, since after this call that
* TermState may be reused. */

View File

@ -68,21 +68,12 @@ public abstract class PostingsWriterBase implements Closeable {
* Usually elements in {@code longs} are file pointers, so each one always
* increases when a new term is consumed. {@code out} is used to write generic
* bytes, which are not monotonic.
*
* NOTE: sometimes long[] might contain "don't care" values that are unused, e.g.
* the pointer to postings list may not be defined for some terms but is defined
* for others, if it is designed to inline some postings data in term dictionary.
* In this case, the postings writer should always use the last value, so that each
* element in metadata long[] remains monotonic.
*/
public abstract void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
public abstract void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
/**
* Sets the current field for writing, and returns the
* fixed length of long[] metadata (which is fixed per
* field), called when the writing switches to another field. */
// TODO: better name?
public abstract int setField(FieldInfo fieldInfo);
* Sets the current field for writing. */
public abstract void setField(FieldInfo fieldInfo);
@Override
public abstract void close() throws IOException;

View File

@ -87,7 +87,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
* fixed length of long[] metadata (which is fixed per
* field), called when the writing switches to another field. */
@Override
public int setField(FieldInfo fieldInfo) {
public void setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
indexOptions = fieldInfo.getIndexOptions();
@ -113,8 +113,6 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
enumFlags = PostingsEnum.OFFSETS;
}
}
return 0;
}
@Override

View File

@ -128,8 +128,11 @@ public final class BlockTreeTermsReader extends FieldsProducer {
/** Auto-prefix terms have been superseded by points. */
public static final int VERSION_AUTO_PREFIX_TERMS_REMOVED = 3;
/** The long[] + byte[] metadata has been replaced with a single byte[]. */
public static final int VERSION_META_LONGS_REMOVED = 4;
/** Current terms format. */
public static final int VERSION_CURRENT = VERSION_AUTO_PREFIX_TERMS_REMOVED;
public static final int VERSION_CURRENT = VERSION_META_LONGS_REMOVED;
/** Extension of terms index file */
static final String TERMS_INDEX_EXTENSION = "tip";
@ -212,9 +215,11 @@ public final class BlockTreeTermsReader extends FieldsProducer {
// when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written.
final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong();
final int docCount = termsIn.readVInt();
final int longsSize = termsIn.readVInt();
if (longsSize < 0) {
throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn);
if (version < VERSION_META_LONGS_REMOVED) {
final int longsSize = termsIn.readVInt();
if (longsSize < 0) {
throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn);
}
}
BytesRef minTerm = readBytesRef(termsIn);
BytesRef maxTerm = readBytesRef(termsIn);
@ -231,7 +236,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
final long indexStartFP = indexIn.readVLong();
FieldReader previous = fieldMap.put(fieldInfo.name,
new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
indexStartFP, longsSize, indexIn, minTerm, maxTerm, state.openedFromWriter, perFieldLoadMode));
indexStartFP, indexIn, minTerm, maxTerm, state.openedFromWriter, perFieldLoadMode));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
}

View File

@ -224,11 +224,10 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
private final int longsSize;
public final BytesRef minTerm;
public final BytesRef maxTerm;
public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize,
public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount,
BytesRef minTerm, BytesRef maxTerm) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
@ -239,7 +238,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
}
@ -509,7 +507,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
class TermsWriter {
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
final FixedBitSet docsSeen;
long sumTotalTermFreq;
@ -524,8 +521,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
private int[] prefixStarts = new int[8];
private final long[] longs;
// Pending stack of terms and blocks. As terms arrive (in sorted order)
// we append to this stack, and once the top of the stack has enough
// terms starting with a common prefix, we write a new block with
@ -720,13 +715,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.copyTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
}
} else {
@ -771,13 +760,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
// separate anymore:
// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.copyTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
} else {
PendingBlock block = (PendingBlock) ent;
@ -845,9 +828,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
this.fieldInfo = fieldInfo;
assert fieldInfo.getIndexOptions() != IndexOptions.NONE;
docsSeen = new FixedBitSet(maxDoc);
this.longsSize = postingsWriter.setField(fieldInfo);
this.longs = new long[longsSize];
postingsWriter.setField(fieldInfo);
}
/** Writes one term's worth of postings. */
@ -964,7 +945,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
sumTotalTermFreq,
sumDocFreq,
docsSeen.cardinality(),
longsSize,
minTerm, maxTerm));
} else {
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1;
@ -976,7 +956,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
}
private boolean closed;
@ -1009,7 +988,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
}
termsOut.writeVLong(field.sumDocFreq);
termsOut.writeVInt(field.docCount);
termsOut.writeVInt(field.longsSize);
indexOut.writeVLong(field.indexStartFP);
writeBytesRef(termsOut, field.minTerm);
writeBytesRef(termsOut, field.maxTerm);

View File

@ -58,7 +58,6 @@ public final class FieldReader extends Terms implements Accountable {
final BytesRef rootCode;
final BytesRef minTerm;
final BytesRef maxTerm;
final int longsSize;
final BlockTreeTermsReader parent;
final FST<BytesRef> index;
@ -66,7 +65,7 @@ public final class FieldReader extends Terms implements Accountable {
//private boolean DEBUG;
FieldReader(BlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm, boolean openedFromWriter, BlockTreeTermsReader.FSTLoadMode fstLoadMode) throws IOException {
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm, boolean openedFromWriter, BlockTreeTermsReader.FSTLoadMode fstLoadMode) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
@ -77,7 +76,6 @@ public final class FieldReader extends Terms implements Accountable {
this.docCount = docCount;
this.indexStartFP = indexStartFP;
this.rootCode = rootCode;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
// if (DEBUG) {

View File

@ -80,11 +80,8 @@ final class IntersectTermsEnumFrame {
FST.Arc<BytesRef> arc;
final BlockTermState termState;
// metadata buffer, holding monotonic values
final long[] longs;
// metadata buffer, holding general values
// metadata buffer
byte[] bytes = new byte[32];
final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
@ -102,7 +99,6 @@ final class IntersectTermsEnumFrame {
this.ord = ord;
this.termState = ite.fr.parent.postingsReader.newTermState();
this.termState.totalTermFreq = -1;
this.longs = new long[ite.fr.longsSize];
}
void loadNextFloorBlock() throws IOException {
@ -278,11 +274,8 @@ final class IntersectTermsEnumFrame {
} else {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
}
// metadata
for (int i = 0; i < ite.fr.longsSize; i++) {
longs[i] = bytesReader.readVLong();
}
ite.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ite.fr.fieldInfo, termState, absolute);
// metadata
ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute);
metaDataUpto++;
absolute = false;

View File

@ -85,9 +85,7 @@ final class SegmentTermsEnumFrame {
final BlockTermState state;
// metadata buffer, holding monotonic values
final long[] longs;
// metadata buffer, holding general values
// metadata buffer
byte[] bytes = new byte[32];
final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
@ -98,7 +96,6 @@ final class SegmentTermsEnumFrame {
this.ord = ord;
this.state = ste.fr.parent.postingsReader.newTermState();
this.state.totalTermFreq = -1;
this.longs = new long[ste.fr.longsSize];
}
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
@ -424,11 +421,8 @@ final class SegmentTermsEnumFrame {
state.totalTermFreq = state.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
// metadata
for (int i = 0; i < ste.fr.longsSize; i++) {
longs[i] = bytesReader.readVLong();
}
ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute);
// metadata
ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute);
metaDataUpto++;
absolute = false;

View File

@ -166,7 +166,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
}
@Override
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
@ -179,11 +179,11 @@ public final class Lucene84PostingsReader extends PostingsReaderBase {
termState.payStartFP = 0;
}
termState.docStartFP += longs[0];
termState.docStartFP += in.readVLong();
if (fieldHasPositions) {
termState.posStartFP += longs[1];
termState.posStartFP += in.readVLong();
if (fieldHasOffsets || fieldHasPayloads) {
termState.payStartFP += longs[2];
termState.payStartFP += in.readVLong();
}
}
if (termState.docFreq == 1) {

View File

@ -190,20 +190,11 @@ public final class Lucene84PostingsWriter extends PushPostingsWriterBase {
}
@Override
public int setField(FieldInfo fieldInfo) {
public void setField(FieldInfo fieldInfo) {
super.setField(fieldInfo);
skipWriter.setField(writePositions, writeOffsets, writePayloads);
lastState = emptyState;
fieldHasNorms = fieldInfo.hasNorms();
if (writePositions) {
if (writePayloads || writeOffsets) {
return 3; // doc + pos + pay FP
} else {
return 2; // doc + pos FP
}
} else {
return 1; // doc FP
}
}
@Override
@ -466,16 +457,16 @@ public final class Lucene84PostingsWriter extends PushPostingsWriterBase {
}
@Override
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
IntBlockTermState state = (IntBlockTermState)_state;
if (absolute) {
lastState = emptyState;
}
longs[0] = state.docStartFP - lastState.docStartFP;
out.writeVLong(state.docStartFP - lastState.docStartFP);
if (writePositions) {
longs[1] = state.posStartFP - lastState.posStartFP;
out.writeVLong(state.posStartFP - lastState.posStartFP);
if (writePayloads || writeOffsets) {
longs[2] = state.payStartFP - lastState.payStartFP;
out.writeVLong(state.payStartFP - lastState.payStartFP);
}
}
if (state.singletonDocID != -1) {

View File

@ -50,7 +50,7 @@ final class IDVersionPostingsReader extends PostingsReaderBase {
}
@Override
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final IDVersionTermState termState = (IDVersionTermState) _termState;
termState.docID = in.readVInt();

View File

@ -46,7 +46,6 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
private long lastVersion;
private final Bits liveDocs;
private String segment;
public IDVersionPostingsWriter(Bits liveDocs) {
this.liveDocs = liveDocs;
@ -60,11 +59,10 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
@Override
public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException {
CodecUtil.writeIndexHeader(termsOut, TERMS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
segment = state.segmentInfo.name;
}
@Override
public int setField(FieldInfo fieldInfo) {
public void setField(FieldInfo fieldInfo) {
super.setField(fieldInfo);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
throw new IllegalArgumentException("field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS");
@ -75,7 +73,6 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
throw new IllegalArgumentException("field cannot index term vectors: CheckIndex will report this as index corruption");
}
lastState = emptyState;
return 0;
}
@Override
@ -154,7 +151,7 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase {
private long lastEncodedVersion;
@Override
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
IDVersionTermState state = (IDVersionTermState) _state;
out.writeVInt(state.docID);
if (absolute) {

View File

@ -83,9 +83,7 @@ final class IDVersionSegmentTermsEnumFrame {
final BlockTermState state;
// metadata buffer, holding monotonic values
public long[] longs;
// metadata buffer, holding general values
// metadata
public byte[] bytes;
ByteArrayDataInput bytesReader;
@ -96,7 +94,6 @@ final class IDVersionSegmentTermsEnumFrame {
this.ord = ord;
this.state = ste.fr.parent.postingsReader.newTermState();
this.state.totalTermFreq = -1;
this.longs = new long[ste.fr.longsSize];
}
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
@ -396,11 +393,8 @@ final class IDVersionSegmentTermsEnumFrame {
state.docFreq = 1;
state.totalTermFreq = 1;
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
// metadata
for (int i = 0; i < ste.fr.longsSize; i++) {
longs[i] = bytesReader.readVLong();
}
ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute);
// metadata
ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute);
metaDataUpto++;
absolute = false;

View File

@ -127,7 +127,6 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer {
final long sumDocFreq = numTerms;
assert numTerms <= Integer.MAX_VALUE;
final int docCount = (int) numTerms;
final int longsSize = in.readVInt();
BytesRef minTerm = readBytesRef(in);
BytesRef maxTerm = readBytesRef(in);
@ -143,7 +142,7 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer {
final long indexStartFP = indexIn.readVLong();
VersionFieldReader previous = fields.put(fieldInfo.name,
new VersionFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
indexStartFP, longsSize, indexIn, minTerm, maxTerm));
indexStartFP, indexIn, minTerm, maxTerm));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
}

View File

@ -143,11 +143,10 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
public final Pair<BytesRef,Long> rootCode;
public final long numTerms;
public final long indexStartFP;
private final int longsSize;
public final BytesRef minTerm;
public final BytesRef maxTerm;
public FieldMetaData(FieldInfo fieldInfo, Pair<BytesRef,Long> rootCode, long numTerms, long indexStartFP, int longsSize,
public FieldMetaData(FieldInfo fieldInfo, Pair<BytesRef,Long> rootCode, long numTerms, long indexStartFP,
BytesRef minTerm, BytesRef maxTerm) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
@ -155,7 +154,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
this.rootCode = rootCode;
this.indexStartFP = indexStartFP;
this.numTerms = numTerms;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
}
@ -403,7 +401,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
class TermsWriter {
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
final FixedBitSet docsSeen;
long indexStartFP;
@ -416,8 +413,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
private int[] prefixStarts = new int[8];
private final long[] longs;
// Pending stack of terms and blocks. As terms arrive (in sorted order)
// we append to this stack, and once the top of the stack has enough
// terms starting with a common prefix, we write a new block with
@ -605,13 +600,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;
// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.copyTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
}
} else {
@ -648,13 +637,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
// separate anymore:
// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.copyTo(metaWriter);
bytesWriter.reset();
postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute);
absolute = false;
} else {
PendingBlock block = (PendingBlock) ent;
@ -720,8 +703,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
this.fieldInfo = fieldInfo;
docsSeen = new FixedBitSet(maxDoc);
this.longsSize = postingsWriter.setField(fieldInfo);
this.longs = new long[longsSize];
postingsWriter.setField(fieldInfo);
}
/** Writes one term's worth of postings. */
@ -818,7 +800,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
((PendingBlock) pending.get(0)).index.getEmptyOutput(),
numTerms,
indexStartFP,
longsSize,
minTerm, maxTerm));
} else {
// cannot assert this: we skip deleted docIDs in the postings:
@ -828,7 +809,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();
private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance();
}
private boolean closed;
@ -856,7 +836,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
out.writeVInt(field.rootCode.output1.length);
out.writeBytes(field.rootCode.output1.bytes, field.rootCode.output1.offset, field.rootCode.output1.length);
out.writeVLong(field.rootCode.output2);
out.writeVInt(field.longsSize);
indexOut.writeVLong(field.indexStartFP);
writeBytesRef(out, field.minTerm);
writeBytesRef(out, field.maxTerm);

View File

@ -45,14 +45,13 @@ final class VersionFieldReader extends Terms implements Accountable {
final Pair<BytesRef,Long> rootCode;
final BytesRef minTerm;
final BytesRef maxTerm;
final int longsSize;
final VersionBlockTreeTermsReader parent;
final FST<Pair<BytesRef,Long>> index;
//private boolean DEBUG;
VersionFieldReader(VersionBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, Pair<BytesRef,Long> rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
@ -63,7 +62,6 @@ final class VersionFieldReader extends Terms implements Accountable {
this.docCount = docCount;
this.indexStartFP = indexStartFP;
this.rootCode = rootCode;
this.longsSize = longsSize;
this.minTerm = minTerm;
this.maxTerm = maxTerm;
// if (DEBUG) {

View File

@ -41,8 +41,6 @@ import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsReader;
import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsReader;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsWriter;
import org.apache.lucene.codecs.memory.FSTOrdTermsReader;
import org.apache.lucene.codecs.memory.FSTOrdTermsWriter;
import org.apache.lucene.codecs.memory.FSTTermsReader;
import org.apache.lucene.codecs.memory.FSTTermsWriter;
import org.apache.lucene.index.FieldInfo;
@ -122,7 +120,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state);
final FieldsConsumer fields;
final int t1 = random.nextInt(5);
final int t1 = random.nextInt(4);
if (t1 == 0) {
boolean success = false;
@ -135,16 +133,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
}
}
} else if (t1 == 1) {
boolean success = false;
try {
fields = new FSTOrdTermsWriter(state, postingsWriter);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else if (t1 == 2) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
@ -165,7 +153,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
postingsWriter.close();
}
}
} else if (t1 == 3) {
} else if (t1 == 2) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing Block terms dict");
@ -235,7 +223,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
}
}
}
} else if (t1 == 4) {
} else if (t1 == 3) {
// Use OrdsBlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing OrdsBlockTree");
@ -287,7 +275,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
PostingsReaderBase postingsReader = new Lucene84PostingsReader(state);
final FieldsProducer fields;
final int t1 = random.nextInt(5);
final int t1 = random.nextInt(4);
if (t1 == 0) {
boolean success = false;
try {
@ -299,16 +287,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
}
}
} else if (t1 == 1) {
boolean success = false;
try {
fields = new FSTOrdTermsReader(state, postingsReader);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
} else if (t1 == 2) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading BlockTree terms dict");
@ -323,7 +301,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
postingsReader.close();
}
}
} else if (t1 == 3) {
} else if (t1 == 2) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading Block terms dict");
@ -374,7 +352,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
}
}
}
} else if (t1 == 4) {
} else if (t1 == 3) {
// Use OrdsBlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading OrdsBlockTree terms dict");

View File

@ -45,7 +45,6 @@ import org.apache.lucene.codecs.bloom.TestBloomFilteredLucenePostings;
import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat;
import org.apache.lucene.codecs.memory.FSTPostingsFormat;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.index.PointValues.IntersectVisitor;
@ -190,7 +189,6 @@ public class RandomCodec extends AssertingCodec {
add(avoidCodecs,
TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock, RandomPicks.randomFrom(random, BlockTreeTermsReader.FSTLoadMode.values())),
new FSTPostingsFormat(),
new FSTOrdPostingsFormat(),
new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock),
LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : lowFreqCutoff)),
//TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucenePostings to be constructed