LUCENE-3069: merge 'temp' codes back

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1516860 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Han Jiang 2013-08-23 14:34:47 +00:00
parent 2fc580e715
commit 1621816d81
18 changed files with 698 additions and 642 deletions

View File

@ -142,6 +142,7 @@ public class BlockTermsReader extends FieldsProducer {
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
@ -151,7 +152,7 @@ public class BlockTermsReader extends FieldsProducer {
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount));
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize));
if (previous != null) {
throw new CorruptIndexException("duplicate fields: " + fieldInfo.name + " (resource=" + in + ")");
}
@ -230,8 +231,9 @@ public class BlockTermsReader extends FieldsProducer {
final long sumTotalTermFreq;
final long sumDocFreq;
final int docCount;
final int longsSize;
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.numTerms = numTerms;
@ -239,6 +241,7 @@ public class BlockTermsReader extends FieldsProducer {
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}
@Override
@ -326,6 +329,10 @@ public class BlockTermsReader extends FieldsProducer {
private final ByteArrayDataInput freqReader = new ByteArrayDataInput();
private int metaDataUpto;
private long[] longs;
private byte[] bytes;
private ByteArrayDataInput bytesReader;
public SegmentTermsEnum() throws IOException {
in = BlockTermsReader.this.in.clone();
in.seek(termsStartPointer);
@ -339,6 +346,7 @@ public class BlockTermsReader extends FieldsProducer {
termSuffixes = new byte[128];
docFreqBytes = new byte[64];
//System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader);
longs = new long[longsSize];
}
@Override
@ -415,7 +423,7 @@ public class BlockTermsReader extends FieldsProducer {
assert result;
indexIsCurrent = true;
didIndexNext = false;
didIndexNext = false;
if (doOrd) {
state.ord = indexEnum.ord()-1;
@ -789,12 +797,21 @@ public class BlockTermsReader extends FieldsProducer {
//System.out.println(" freq bytes len=" + len);
in.readBytes(docFreqBytes, 0, len);
freqReader.reset(docFreqBytes, 0, len);
// metadata
len = in.readVInt();
if (bytes == null) {
bytes = new byte[ArrayUtil.oversize(len, 1)];
bytesReader = new ByteArrayDataInput();
} else if (bytes.length < len) {
bytes = new byte[ArrayUtil.oversize(len, 1)];
}
in.readBytes(bytes, 0, len);
bytesReader.reset(bytes, 0, len);
metaDataUpto = 0;
state.termBlockOrd = 0;
postingsReader.readTermsBlock(in, fieldInfo, state);
indexIsCurrent = false;
//System.out.println(" indexIsCurrent=" + indexIsCurrent);
@ -811,9 +828,7 @@ public class BlockTermsReader extends FieldsProducer {
// lazily catch up on metadata decode:
final int limit = state.termBlockOrd;
// We must set/incr state.termCount because
// postings impl can look at this
state.termBlockOrd = metaDataUpto;
boolean absolute = metaDataUpto == 0;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
//System.out.println(" decode mdUpto=" + metaDataUpto);
@ -825,16 +840,21 @@ public class BlockTermsReader extends FieldsProducer {
// TODO: if docFreq were bulk decoded we could
// just skipN here:
// docFreq, totalTermFreq
state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq);
}
postingsReader.nextTerm(fieldInfo, state);
// metadata
for (int i = 0; i < longs.length; i++) {
longs[i] = bytesReader.readVLong();
}
postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
metaDataUpto++;
state.termBlockOrd++;
absolute = false;
}
} else {
//System.out.println(" skip! seekPending");

View File

@ -27,6 +27,7 @@ import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.PostingsConsumer;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.TermsConsumer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@ -77,8 +78,9 @@ public class BlockTermsWriter extends FieldsConsumer {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
public final int longsSize;
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) {
public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
this.termsStartPointer = termsStartPointer;
@ -86,6 +88,7 @@ public class BlockTermsWriter extends FieldsConsumer {
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}
}
@ -109,7 +112,7 @@ public class BlockTermsWriter extends FieldsConsumer {
//System.out.println("BTW.init seg=" + state.segmentName);
postingsWriter.start(out); // have consumer write its format/header
postingsWriter.init(out); // have consumer write its format/header
success = true;
} finally {
if (!success) {
@ -148,6 +151,7 @@ public class BlockTermsWriter extends FieldsConsumer {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
}
writeTrailer(dirStart);
} finally {
@ -161,7 +165,7 @@ public class BlockTermsWriter extends FieldsConsumer {
private static class TermEntry {
public final BytesRef term = new BytesRef();
public TermStats stats;
public BlockTermState state;
}
class TermsWriter extends TermsConsumer {
@ -173,6 +177,7 @@ public class BlockTermsWriter extends FieldsConsumer {
long sumTotalTermFreq;
long sumDocFreq;
int docCount;
int longsSize;
private TermEntry[] pendingTerms;
@ -190,8 +195,8 @@ public class BlockTermsWriter extends FieldsConsumer {
pendingTerms[i] = new TermEntry();
}
termsStartPointer = out.getFilePointer();
postingsWriter.setField(fieldInfo);
this.postingsWriter = postingsWriter;
this.longsSize = postingsWriter.setField(fieldInfo);
}
@Override
@ -237,11 +242,12 @@ public class BlockTermsWriter extends FieldsConsumer {
}
final TermEntry te = pendingTerms[pendingCount];
te.term.copyBytes(text);
te.stats = stats;
te.state = postingsWriter.newTermState();
te.state.docFreq = stats.docFreq;
te.state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(te.state);
pendingCount++;
postingsWriter.finishTerm(stats);
numTerms++;
}
@ -264,7 +270,8 @@ public class BlockTermsWriter extends FieldsConsumer {
termsStartPointer,
sumTotalTermFreq,
sumDocFreq,
docCount));
docCount,
longsSize));
}
}
@ -285,6 +292,7 @@ public class BlockTermsWriter extends FieldsConsumer {
}
private final RAMOutputStream bytesWriter = new RAMOutputStream();
private final RAMOutputStream bufferWriter = new RAMOutputStream();
private void flushBlock() throws IOException {
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
@ -318,19 +326,34 @@ public class BlockTermsWriter extends FieldsConsumer {
// TODO: cutover to better intblock codec. simple64?
// write prefix, suffix first:
for(int termCount=0;termCount<pendingCount;termCount++) {
final TermStats stats = pendingTerms[termCount].stats;
assert stats != null;
bytesWriter.writeVInt(stats.docFreq);
final BlockTermState state = pendingTerms[termCount].state;
assert state != null;
bytesWriter.writeVInt(state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
bytesWriter.writeVLong(state.totalTermFreq-state.docFreq);
}
}
out.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(out);
bytesWriter.reset();
postingsWriter.flushTermsBlock(pendingCount, pendingCount);
// 4th pass: write the metadata
long[] longs = new long[longsSize];
boolean absolute = true;
for(int termCount=0;termCount<pendingCount;termCount++) {
final BlockTermState state = pendingTerms[termCount].state;
postingsWriter.encodeTerm(longs, bufferWriter, fieldInfo, state, absolute);
for (int i = 0; i < longsSize; i++) {
bytesWriter.writeVLong(longs[i]);
}
bufferWriter.writeTo(bytesWriter);
bufferWriter.reset();
absolute = false;
}
out.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(out);
bytesWriter.reset();
lastPrevTerm.copyBytes(pendingTerms[pendingCount-1].term);
pendingCount = 0;
}

View File

@ -79,7 +79,7 @@ public abstract class PulsingPostingsFormat extends PostingsFormat {
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
pulsingWriter = new PulsingPostingsWriter(freqCutoff, docsWriter);
pulsingWriter = new PulsingPostingsWriter(state, freqCutoff, docsWriter);
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter, minBlockSize, maxBlockSize);
success = true;
return ret;
@ -98,7 +98,7 @@ public abstract class PulsingPostingsFormat extends PostingsFormat {
boolean success = false;
try {
docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
pulsingReader = new PulsingPostingsReader(docsReader);
pulsingReader = new PulsingPostingsReader(state, docsReader);
FieldsProducer ret = new BlockTreeTermsReader(
state.directory, state.fieldInfos, state.segmentInfo,
pulsingReader,

View File

@ -20,16 +20,20 @@ package org.apache.lucene.codecs.pulsing;
import java.io.IOException;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Attribute;
@ -37,6 +41,7 @@ import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/** Concrete class that reads the current doc/freq/skip
* postings format
@ -50,28 +55,53 @@ public class PulsingPostingsReader extends PostingsReaderBase {
// Fallback reader for non-pulsed terms:
final PostingsReaderBase wrappedPostingsReader;
final SegmentReadState segmentState;
int maxPositions;
int version;
TreeMap<Integer, Integer> fields;
public PulsingPostingsReader(PostingsReaderBase wrappedPostingsReader) {
public PulsingPostingsReader(SegmentReadState state, PostingsReaderBase wrappedPostingsReader) {
this.wrappedPostingsReader = wrappedPostingsReader;
this.segmentState = state;
}
@Override
public void init(IndexInput termsIn) throws IOException {
CodecUtil.checkHeader(termsIn, PulsingPostingsWriter.CODEC,
PulsingPostingsWriter.VERSION_START, PulsingPostingsWriter.VERSION_START);
version = CodecUtil.checkHeader(termsIn, PulsingPostingsWriter.CODEC,
PulsingPostingsWriter.VERSION_START,
PulsingPostingsWriter.VERSION_CURRENT);
maxPositions = termsIn.readVInt();
wrappedPostingsReader.init(termsIn);
if (wrappedPostingsReader instanceof PulsingPostingsReader ||
version < PulsingPostingsWriter.VERSION_META_ARRAY) {
fields = null;
} else {
fields = new TreeMap<Integer, Integer>();
String summaryFileName = IndexFileNames.segmentFileName(segmentState.segmentInfo.name, segmentState.segmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION);
IndexInput in = null;
try {
in = segmentState.directory.openInput(summaryFileName, segmentState.context);
CodecUtil.checkHeader(in, PulsingPostingsWriter.CODEC, version,
PulsingPostingsWriter.VERSION_CURRENT);
int numField = in.readVInt();
for (int i = 0; i < numField; i++) {
int fieldNum = in.readVInt();
int longsSize = in.readVInt();
fields.put(fieldNum, longsSize);
}
} finally {
IOUtils.closeWhileHandlingException(in);
}
}
}
private static class PulsingTermState extends BlockTermState {
private boolean absolute = false;
private long[] longs;
private byte[] postings;
private int postingsSize; // -1 if this term was not inlined
private BlockTermState wrappedTermState;
ByteArrayDataInput inlinedBytesReader;
private byte[] inlinedBytes;
@Override
public PulsingTermState clone() {
PulsingTermState clone;
@ -82,6 +112,11 @@ public class PulsingPostingsReader extends PostingsReaderBase {
} else {
assert wrappedTermState != null;
clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
clone.absolute = absolute;
if (longs != null) {
clone.longs = new long[longs.length];
System.arraycopy(longs, 0, clone.longs, 0, longs.length);
}
}
return clone;
}
@ -99,11 +134,6 @@ public class PulsingPostingsReader extends PostingsReaderBase {
} else {
wrappedTermState.copyFrom(other.wrappedTermState);
}
// NOTE: we do not copy the
// inlinedBytes/inlinedBytesReader; these are only
// stored on the "primary" TermState. They are
// "transient" to cloned term states.
}
@Override
@ -116,25 +146,6 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
}
@Override
public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
//System.out.println("PR.readTermsBlock state=" + _termState);
final PulsingTermState termState = (PulsingTermState) _termState;
if (termState.inlinedBytes == null) {
termState.inlinedBytes = new byte[128];
termState.inlinedBytesReader = new ByteArrayDataInput();
}
int len = termsIn.readVInt();
//System.out.println(" len=" + len + " fp=" + termsIn.getFilePointer());
if (termState.inlinedBytes.length < len) {
termState.inlinedBytes = new byte[ArrayUtil.oversize(len, 1)];
}
termsIn.readBytes(termState.inlinedBytes, 0, len);
termState.inlinedBytesReader.reset(termState.inlinedBytes);
termState.wrappedTermState.termBlockOrd = 0;
wrappedPostingsReader.readTermsBlock(termsIn, fieldInfo, termState.wrappedTermState);
}
@Override
public BlockTermState newTermState() throws IOException {
PulsingTermState state = new PulsingTermState();
@ -143,20 +154,20 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
@Override
public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException {
//System.out.println("PR nextTerm");
PulsingTermState termState = (PulsingTermState) _termState;
assert empty.length == 0;
termState.absolute = termState.absolute || absolute;
// if we have positions, its total TF, otherwise its computed based on docFreq.
long count = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? termState.totalTermFreq : termState.docFreq;
//System.out.println(" count=" + count + " threshold=" + maxPositions);
if (count <= maxPositions) {
// Inlined into terms dict -- just read the byte[] blob in,
// but don't decode it now (we only decode when a DocsEnum
// or D&PEnum is pulled):
termState.postingsSize = termState.inlinedBytesReader.readVInt();
termState.postingsSize = in.readVInt();
if (termState.postings == null || termState.postings.length < termState.postingsSize) {
termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)];
}
@ -164,16 +175,23 @@ public class PulsingPostingsReader extends PostingsReaderBase {
// (the blob holding all inlined terms' blobs for
// current term block) into another byte[] (just the
// blob for this term)...
termState.inlinedBytesReader.readBytes(termState.postings, 0, termState.postingsSize);
in.readBytes(termState.postings, 0, termState.postingsSize);
//System.out.println(" inlined bytes=" + termState.postingsSize);
termState.absolute = termState.absolute || absolute;
} else {
//System.out.println(" not inlined");
final int longsSize = fields == null ? 0 : fields.get(fieldInfo.number);
if (termState.longs == null) {
termState.longs = new long[longsSize];
}
for (int i = 0; i < longsSize; i++) {
termState.longs[i] = in.readVLong();
}
termState.postingsSize = -1;
// TODO: should we do full copyFrom? much heavier...?
termState.wrappedTermState.docFreq = termState.docFreq;
termState.wrappedTermState.totalTermFreq = termState.totalTermFreq;
wrappedPostingsReader.nextTerm(fieldInfo, termState.wrappedTermState);
termState.wrappedTermState.termBlockOrd++;
wrappedPostingsReader.decodeTerm(termState.longs, in, fieldInfo, termState.wrappedTermState, termState.absolute);
termState.absolute = false;
}
}

View File

@ -21,14 +21,19 @@ import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
// TODO: we now inline based on total TF of the term,
// but it might be better to inline by "net bytes used"
@ -49,26 +54,43 @@ public final class PulsingPostingsWriter extends PostingsWriterBase {
final static String CODEC = "PulsedPostingsWriter";
// recording field summary
final static String SUMMARY_EXTENSION = "smy";
// To add a new version, increment from the last one, and
// change VERSION_CURRENT to point to your new version:
final static int VERSION_START = 0;
final static int VERSION_CURRENT = VERSION_START;
final static int VERSION_META_ARRAY = 0;
final static int VERSION_CURRENT = VERSION_META_ARRAY;
private SegmentWriteState segmentState;
private IndexOutput termsOut;
private List<FieldMetaData> fields;
private IndexOptions indexOptions;
private boolean storePayloads;
private static class PendingTerm {
private final byte[] bytes;
public PendingTerm(byte[] bytes) {
this.bytes = bytes;
// information for wrapped PF, in current field
private int longsSize;
private long[] longs;
boolean absolute;
private static class PulsingTermState extends BlockTermState {
private byte[] bytes;
private BlockTermState wrappedState;
@Override
public String toString() {
if (bytes != null) {
return "inlined";
} else {
return "not inlined wrapped=" + wrappedState;
}
}
}
private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
// one entry per position
private final Position[] pending;
private int pendingCount = 0; // -1 once we've hit too many positions
@ -83,6 +105,15 @@ public final class PulsingPostingsWriter extends PostingsWriterBase {
int endOffset;
}
private static final class FieldMetaData {
int fieldNumber;
int longsSize;
FieldMetaData(int number, int size) {
fieldNumber = number;
longsSize = size;
}
}
// TODO: -- lazy init this? ie, if every single term
// was inlined (eg for a "primary key" field) then we
// never need to use this fallback? Fallback writer for
@ -92,23 +123,33 @@ public final class PulsingPostingsWriter extends PostingsWriterBase {
/** If the total number of positions (summed across all docs
* for this term) is <= maxPositions, then the postings are
* inlined into terms dict */
public PulsingPostingsWriter(int maxPositions, PostingsWriterBase wrappedPostingsWriter) {
public PulsingPostingsWriter(SegmentWriteState state, int maxPositions, PostingsWriterBase wrappedPostingsWriter) {
pending = new Position[maxPositions];
for(int i=0;i<maxPositions;i++) {
pending[i] = new Position();
}
fields = new ArrayList<FieldMetaData>();
// We simply wrap another postings writer, but only call
// on it when tot positions is >= the cutoff:
this.wrappedPostingsWriter = wrappedPostingsWriter;
this.segmentState = state;
}
@Override
public void start(IndexOutput termsOut) throws IOException {
public void init(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeVInt(pending.length); // encode maxPositions in header
wrappedPostingsWriter.start(termsOut);
wrappedPostingsWriter.init(termsOut);
}
@Override
public BlockTermState newTermState() throws IOException {
PulsingTermState state = new PulsingTermState();
state.wrappedState = wrappedPostingsWriter.newTermState();
return state;
}
@Override
@ -123,11 +164,15 @@ public final class PulsingPostingsWriter extends PostingsWriterBase {
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
public void setField(FieldInfo fieldInfo) {
public int setField(FieldInfo fieldInfo) {
this.indexOptions = fieldInfo.getIndexOptions();
//if (DEBUG) System.out.println("PW field=" + fieldInfo.name + " indexOptions=" + indexOptions);
storePayloads = fieldInfo.hasPayloads();
wrappedPostingsWriter.setField(fieldInfo);
absolute = false;
longsSize = wrappedPostingsWriter.setField(fieldInfo);
longs = new long[longsSize];
fields.add(new FieldMetaData(fieldInfo.number, longsSize));
return 0;
//DEBUG = BlockTreeTermsWriter.DEBUG;
}
@ -219,18 +264,19 @@ public final class PulsingPostingsWriter extends PostingsWriterBase {
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(TermStats stats) throws IOException {
public void finishTerm(BlockTermState _state) throws IOException {
PulsingTermState state = (PulsingTermState) _state;
// if (DEBUG) System.out.println("PW finishTerm docCount=" + stats.docFreq + " pendingCount=" + pendingCount + " pendingTerms.size()=" + pendingTerms.size());
assert pendingCount > 0 || pendingCount == -1;
if (pendingCount == -1) {
wrappedPostingsWriter.finishTerm(stats);
// Must add null entry to record terms that our
// wrapped postings impl added
pendingTerms.add(null);
state.wrappedState.docFreq = state.docFreq;
state.wrappedState.totalTermFreq = state.totalTermFreq;
state.bytes = null;
wrappedPostingsWriter.finishTerm(state.wrappedState);
} else {
// There were few enough total occurrences for this
// term, so we fully inline our postings data into
// terms dict, now:
@ -325,61 +371,54 @@ public final class PulsingPostingsWriter extends PostingsWriterBase {
}
}
final byte[] bytes = new byte[(int) buffer.getFilePointer()];
buffer.writeTo(bytes, 0);
pendingTerms.add(new PendingTerm(bytes));
state.bytes = new byte[(int) buffer.getFilePointer()];
buffer.writeTo(state.bytes, 0);
buffer.reset();
}
pendingCount = 0;
}
@Override
public void encodeTerm(long[] empty, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
PulsingTermState state = (PulsingTermState)_state;
assert empty.length == 0;
this.absolute = this.absolute || absolute;
if (state.bytes == null) {
wrappedPostingsWriter.encodeTerm(longs, buffer, fieldInfo, state.wrappedState, this.absolute);
for (int i = 0; i < longsSize; i++) {
out.writeVLong(longs[i]);
}
buffer.writeTo(out);
buffer.reset();
this.absolute = false;
} else {
out.writeVInt(state.bytes.length);
out.writeBytes(state.bytes, 0, state.bytes.length);
this.absolute = this.absolute || absolute;
}
}
@Override
public void close() throws IOException {
wrappedPostingsWriter.close();
}
@Override
public void flushTermsBlock(int start, int count) throws IOException {
// if (DEBUG) System.out.println("PW: flushTermsBlock start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size());
int wrappedCount = 0;
assert buffer.getFilePointer() == 0;
assert start >= count;
final int limit = pendingTerms.size() - start + count;
for(int idx=pendingTerms.size()-start; idx<limit; idx++) {
final PendingTerm term = pendingTerms.get(idx);
if (term == null) {
wrappedCount++;
} else {
buffer.writeVInt(term.bytes.length);
buffer.writeBytes(term.bytes, 0, term.bytes.length);
}
if (wrappedPostingsWriter instanceof PulsingPostingsWriter ||
VERSION_CURRENT < VERSION_META_ARRAY) {
return;
}
termsOut.writeVInt((int) buffer.getFilePointer());
buffer.writeTo(termsOut);
buffer.reset();
// TDOO: this could be somewhat costly since
// pendingTerms.size() could be biggish?
int futureWrappedCount = 0;
final int limit2 = pendingTerms.size();
for(int idx=limit;idx<limit2;idx++) {
if (pendingTerms.get(idx) == null) {
futureWrappedCount++;
String summaryFileName = IndexFileNames.segmentFileName(segmentState.segmentInfo.name, segmentState.segmentSuffix, SUMMARY_EXTENSION);
IndexOutput out = null;
try {
out = segmentState.directory.createOutput(summaryFileName, segmentState.context);
CodecUtil.writeHeader(out, CODEC, VERSION_CURRENT);
out.writeVInt(fields.size());
for (FieldMetaData field : fields) {
out.writeVInt(field.fieldNumber);
out.writeVInt(field.longsSize);
}
out.close();
} finally {
IOUtils.closeWhileHandlingException(out);
}
// Remove the terms we just wrote:
pendingTerms.subList(pendingTerms.size()-start, limit).clear();
// if (DEBUG) System.out.println("PW: len=" + buffer.getFilePointer() + " fp=" + termsOut.getFilePointer() + " futureWrappedCount=" + futureWrappedCount + " wrappedCount=" + wrappedCount);
// TODO: can we avoid calling this if all terms
// were inlined...? Eg for a "primary key" field, the
// wrapped codec is never invoked...
wrappedPostingsWriter.flushTermsBlock(futureWrappedCount+wrappedCount, wrappedCount);
}
// Pushes pending positions to the wrapped codec

View File

@ -31,6 +31,7 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -115,15 +116,6 @@ public class SepPostingsReader extends PostingsReaderBase {
long payloadFP;
long skipFP;
// Only used for "primary" term state; these are never
// copied on clone:
// TODO: these should somehow be stored per-TermsEnum
// not per TermState; maybe somehow the terms dict
// should load/manage the byte[]/DataReader for us?
byte[] bytes;
ByteArrayDataInput bytesReader;
@Override
public SepTermState clone() {
SepTermState other = new SepTermState();
@ -182,40 +174,21 @@ public class SepPostingsReader extends PostingsReaderBase {
}
@Override
public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final SepTermState termState = (SepTermState) _termState;
//System.out.println("SEPR: readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
final int len = termsIn.readVInt();
//System.out.println(" numBytes=" + len);
if (termState.bytes == null) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
termState.bytesReader = new ByteArrayDataInput(termState.bytes);
} else if (termState.bytes.length < len) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
}
termState.bytesReader.reset(termState.bytes, 0, len);
termsIn.readBytes(termState.bytes, 0, len);
}
@Override
public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
final SepTermState termState = (SepTermState) _termState;
final boolean isFirstTerm = termState.termBlockOrd == 0;
//System.out.println("SEPR.nextTerm termCount=" + termState.termBlockOrd + " isFirstTerm=" + isFirstTerm + " bytesReader.pos=" + termState.bytesReader.getPosition());
//System.out.println(" docFreq=" + termState.docFreq);
termState.docIndex.read(termState.bytesReader, isFirstTerm);
//System.out.println(" docIndex=" + termState.docIndex);
termState.docIndex.read(in, absolute);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
termState.freqIndex.read(termState.bytesReader, isFirstTerm);
termState.freqIndex.read(in, absolute);
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
//System.out.println(" freqIndex=" + termState.freqIndex);
termState.posIndex.read(termState.bytesReader, isFirstTerm);
termState.posIndex.read(in, absolute);
//System.out.println(" posIndex=" + termState.posIndex);
if (fieldInfo.hasPayloads()) {
if (isFirstTerm) {
termState.payloadFP = termState.bytesReader.readVLong();
if (absolute) {
termState.payloadFP = in.readVLong();
} else {
termState.payloadFP += termState.bytesReader.readVLong();
termState.payloadFP += in.readVLong();
}
//System.out.println(" payloadFP=" + termState.payloadFP);
}
@ -223,14 +196,14 @@ public class SepPostingsReader extends PostingsReaderBase {
}
if (termState.docFreq >= skipMinimum) {
//System.out.println(" readSkip @ " + termState.bytesReader.getPosition());
if (isFirstTerm) {
termState.skipFP = termState.bytesReader.readVLong();
//System.out.println(" readSkip @ " + in.getPosition());
if (absolute) {
termState.skipFP = in.readVLong();
} else {
termState.skipFP += termState.bytesReader.readVLong();
termState.skipFP += in.readVLong();
}
//System.out.println(" skipFP=" + termState.skipFP);
} else if (isFirstTerm) {
} else if (absolute) {
termState.skipFP = 0;
}
}

View File

@ -18,18 +18,17 @@ package org.apache.lucene.codecs.sep;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
@ -64,7 +63,6 @@ public final class SepPostingsWriter extends PostingsWriterBase {
IndexOutput payloadOut;
IndexOutput skipOut;
IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
/** Expert: The fraction of TermDocs entries stored in skip tables,
@ -87,8 +85,6 @@ public final class SepPostingsWriter extends PostingsWriterBase {
final int totalNumDocs;
PendingTerm lastState;
boolean storePayloads;
IndexOptions indexOptions;
@ -100,8 +96,9 @@ public final class SepPostingsWriter extends PostingsWriterBase {
int lastDocID;
int df;
// Holds pending byte[] blob for the current terms block
private final RAMOutputStream indexBytesWriter = new RAMOutputStream();
SepTermState lastState;
long lastPayloadFP;
long lastSkipFP;
public SepPostingsWriter(SegmentWriteState state, IntStreamFactory factory) throws IOException {
this(state, factory, DEFAULT_SKIP_INTERVAL);
@ -121,6 +118,7 @@ public final class SepPostingsWriter extends PostingsWriterBase {
docOut = factory.createOutput(state.directory, docFileName, state.context);
docIndex = docOut.index();
if (state.fieldInfos.hasFreq()) {
final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION);
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
@ -157,8 +155,7 @@ public final class SepPostingsWriter extends PostingsWriterBase {
}
@Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
public void init(IndexOutput termsOut) throws IOException {
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
@ -166,6 +163,11 @@ public final class SepPostingsWriter extends PostingsWriterBase {
termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
public SepTermState newTermState() {
return new SepTermState();
}
@Override
public void startTerm() throws IOException {
docIndex.mark();
@ -187,7 +189,7 @@ public final class SepPostingsWriter extends PostingsWriterBase {
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
public void setField(FieldInfo fieldInfo) {
public int setField(FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
this.indexOptions = fieldInfo.getIndexOptions();
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
@ -195,6 +197,24 @@ public final class SepPostingsWriter extends PostingsWriterBase {
}
skipListWriter.setIndexOptions(indexOptions);
storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.hasPayloads();
lastPayloadFP = 0;
lastSkipFP = 0;
lastState = setEmptyState();
return 0;
}
private SepTermState setEmptyState() {
SepTermState emptyState = new SepTermState();
emptyState.docIndex = docOut.index();
if (indexOptions != IndexOptions.DOCS_ONLY) {
emptyState.freqIndex = freqOut.index();
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
emptyState.posIndex = posOut.index();
}
}
emptyState.payloadFP = 0;
emptyState.skipFP = 0;
return emptyState;
}
/** Adds a new doc in this term. If this returns null
@ -262,135 +282,86 @@ public final class SepPostingsWriter extends PostingsWriterBase {
lastPosition = 0;
}
private static class PendingTerm {
public final IntIndexOutput.Index docIndex;
public final IntIndexOutput.Index freqIndex;
public final IntIndexOutput.Index posIndex;
private static class SepTermState extends BlockTermState {
public IntIndexOutput.Index docIndex;
public IntIndexOutput.Index freqIndex;
public IntIndexOutput.Index posIndex;
public long payloadFP;
public long skipFP;
public PendingTerm(IntIndexOutput.Index docIndex, IntIndexOutput.Index freqIndex, IntIndexOutput.Index posIndex, long payloadFP, long skipFP) {
this.docIndex = docIndex;
this.freqIndex = freqIndex;
this.posIndex = posIndex;
this.payloadFP = payloadFP;
this.skipFP = skipFP;
}
}
private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(TermStats stats) throws IOException {
public void finishTerm(BlockTermState _state) throws IOException {
SepTermState state = (SepTermState)_state;
// TODO: -- wasteful we are counting this in two places?
assert stats.docFreq > 0;
assert stats.docFreq == df;
assert state.docFreq > 0;
assert state.docFreq == df;
final IntIndexOutput.Index docIndexCopy = docOut.index();
docIndexCopy.copyFrom(docIndex, false);
final IntIndexOutput.Index freqIndexCopy;
final IntIndexOutput.Index posIndexCopy;
state.docIndex = docOut.index();
state.docIndex.copyFrom(docIndex, false);
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndexCopy = freqOut.index();
freqIndexCopy.copyFrom(freqIndex, false);
state.freqIndex = freqOut.index();
state.freqIndex.copyFrom(freqIndex, false);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndexCopy = posOut.index();
posIndexCopy.copyFrom(posIndex, false);
state.posIndex = posOut.index();
state.posIndex.copyFrom(posIndex, false);
} else {
posIndexCopy = null;
state.posIndex = null;
}
} else {
freqIndexCopy = null;
posIndexCopy = null;
state.freqIndex = null;
state.posIndex = null;
}
final long skipFP;
if (df >= skipMinimum) {
skipFP = skipOut.getFilePointer();
state.skipFP = skipOut.getFilePointer();
//System.out.println(" skipFP=" + skipFP);
skipListWriter.writeSkip(skipOut);
//System.out.println(" numBytes=" + (skipOut.getFilePointer()-skipFP));
} else {
skipFP = -1;
state.skipFP = -1;
}
state.payloadFP = payloadStart;
lastDocID = 0;
df = 0;
pendingTerms.add(new PendingTerm(docIndexCopy,
freqIndexCopy,
posIndexCopy,
payloadStart,
skipFP));
}
@Override
public void flushTermsBlock(int start, int count) throws IOException {
//System.out.println("SEPW: flushTermsBlock: start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size() + " termsOut.fp=" + termsOut.getFilePointer());
assert indexBytesWriter.getFilePointer() == 0;
final int absStart = pendingTerms.size() - start;
final List<PendingTerm> slice = pendingTerms.subList(absStart, absStart+count);
if (count == 0) {
termsOut.writeByte((byte) 0);
return;
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
SepTermState state = (SepTermState)_state;
if (absolute) {
lastSkipFP = 0;
lastPayloadFP = 0;
lastState = state;
}
long lastSkipFP = 0;
long lastPayloadFP = 0;
boolean isFirstTerm = true;
for(int idx=0;idx<slice.size();idx++) {
if (isFirstTerm) {
lastState = slice.get(idx);
}
final PendingTerm t = slice.get(idx);
//System.out.println(" last(pure): doc="+lastState.docIndex +" frq=" + lastState.freqIndex+" pos="+lastState.posIndex);
lastState.docIndex.copyFrom(t.docIndex, false);
lastState.docIndex.write(indexBytesWriter, isFirstTerm);
//System.out.print(" doc=" + lastState.docIndex + " 1FP=" + indexBytesWriter.getFilePointer());
if (indexOptions != IndexOptions.DOCS_ONLY) {
lastState.freqIndex.copyFrom(t.freqIndex, false);
lastState.freqIndex.write(indexBytesWriter, isFirstTerm);
//System.out.print(" frq=" + lastState.freqIndex + " 2FP=" + indexBytesWriter.getFilePointer());
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
lastState.posIndex.copyFrom(t.posIndex, false);
lastState.posIndex.write(indexBytesWriter, isFirstTerm);
//System.out.print(" pos=" + lastState.posIndex + " 3FP=" + indexBytesWriter.getFilePointer());
if (storePayloads) {
if (isFirstTerm) {
//System.out.print(" payFP=" + (t.payloadFP));
indexBytesWriter.writeVLong(t.payloadFP);
} else {
//System.out.print(" payFP=" + (t.payloadFP - lastPayloadFP));
indexBytesWriter.writeVLong(t.payloadFP - lastPayloadFP);
}
lastPayloadFP = t.payloadFP;
lastState.docIndex.copyFrom(state.docIndex, false);
lastState.docIndex.write(out, absolute);
if (indexOptions != IndexOptions.DOCS_ONLY) {
lastState.freqIndex.copyFrom(state.freqIndex, false);
lastState.freqIndex.write(out, absolute);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
lastState.posIndex.copyFrom(state.posIndex, false);
lastState.posIndex.write(out, absolute);
if (storePayloads) {
if (absolute) {
out.writeVLong(state.payloadFP);
} else {
out.writeVLong(state.payloadFP - lastPayloadFP);
}
lastPayloadFP = state.payloadFP;
}
}
if (t.skipFP != -1) {
if (isFirstTerm) {
//System.out.print(" a.skipFP=" + (t.skipFP));
indexBytesWriter.writeVLong(t.skipFP);
} else {
//System.out.print(" b.skipFP=" + (t.skipFP - lastSkipFP));
indexBytesWriter.writeVLong(t.skipFP - lastSkipFP);
}
lastSkipFP = t.skipFP;
}
//System.out.println();
//System.out.println(" last(copy): doc="+lastState.docIndex +" frq=" + lastState.freqIndex+" pos="+lastState.posIndex);
isFirstTerm = false;
}
termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
indexBytesWriter.writeTo(termsOut);
indexBytesWriter.reset();
slice.clear();
if (state.skipFP != -1) {
if (absolute) {
out.writeVLong(state.skipFP);
} else {
out.writeVLong(state.skipFP - lastSkipFP);
}
lastSkipFP = state.skipFP;
}
}
@Override

View File

@ -158,6 +158,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
final long sumDocFreq = in.readVLong();
final int docCount = in.readVInt();
final int longsSize = in.readVInt();
if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
}
@ -168,7 +169,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
}
final long indexStartFP = indexIn.readVLong();
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
}
@ -448,11 +449,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
final long indexStartFP;
final long rootBlockFP;
final BytesRef rootCode;
private final FST<BytesRef> index;
final int longsSize;
private final FST<BytesRef> index;
//private boolean DEBUG;
FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, IndexInput indexIn) throws IOException {
FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize, IndexInput indexIn) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
@ -462,6 +464,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
this.docCount = docCount;
this.indexStartFP = indexStartFP;
this.rootCode = rootCode;
this.longsSize = longsSize;
// if (DEBUG) {
// System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
// }
@ -612,6 +615,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
FST.Arc<BytesRef> arc;
final BlockTermState termState;
// metadata buffer, holding monotonical values
public long[] longs;
// metadata buffer, holding general values
public byte[] bytes;
ByteArrayDataInput bytesReader;
// Cumulative output so far
BytesRef outputPrefix;
@ -621,8 +630,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
public Frame(int ord) throws IOException {
this.ord = ord;
termState = postingsReader.newTermState();
termState.totalTermFreq = -1;
this.termState = postingsReader.newTermState();
this.termState.totalTermFreq = -1;
this.longs = new long[longsSize];
}
void loadNextFloorBlock() throws IOException {
@ -720,8 +730,17 @@ public class BlockTreeTermsReader extends FieldsProducer {
termState.termBlockOrd = 0;
nextEnt = 0;
postingsReader.readTermsBlock(in, fieldInfo, termState);
// metadata
numBytes = in.readVInt();
if (bytes == null) {
bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
bytesReader = new ByteArrayDataInput();
} else if (bytes.length < numBytes) {
bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
in.readBytes(bytes, 0, numBytes);
bytesReader.reset(bytes, 0, numBytes);
if (!isLastInFloor) {
// Sub-blocks of a single floor block are always
@ -774,12 +793,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
// lazily catch up on metadata decode:
final int limit = getTermBlockOrd();
boolean absolute = metaDataUpto == 0;
assert limit > 0;
// We must set/incr state.termCount because
// postings impl can look at this
termState.termBlockOrd = metaDataUpto;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
@ -791,17 +807,24 @@ public class BlockTreeTermsReader extends FieldsProducer {
// TODO: if docFreq were bulk decoded we could
// just skipN here:
// stats
termState.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
// metadata
for (int i = 0; i < longsSize; i++) {
longs[i] = bytesReader.readVLong();
}
postingsReader.decodeTerm(longs, bytesReader, fieldInfo, termState, absolute);
postingsReader.nextTerm(fieldInfo, termState);
metaDataUpto++;
termState.termBlockOrd++;
absolute = false;
}
termState.termBlockOrd = metaDataUpto;
}
}
@ -1707,6 +1730,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
// if (DEBUG) {
// System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
// }
@ -2290,10 +2314,17 @@ public class BlockTreeTermsReader extends FieldsProducer {
final BlockTermState state;
// metadata buffer, holding monotonical values
public long[] longs;
// metadata buffer, holding general values
public byte[] bytes;
ByteArrayDataInput bytesReader;
public Frame(int ord) throws IOException {
this.ord = ord;
state = postingsReader.newTermState();
state.totalTermFreq = -1;
this.state = postingsReader.newTermState();
this.state.totalTermFreq = -1;
this.longs = new long[longsSize];
}
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
@ -2391,7 +2422,17 @@ public class BlockTreeTermsReader extends FieldsProducer {
// TODO: we could skip this if !hasTerms; but
// that's rare so won't help much
postingsReader.readTermsBlock(in, fieldInfo, state);
// metadata
numBytes = in.readVInt();
if (bytes == null) {
bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
bytesReader = new ByteArrayDataInput();
} else if (bytes.length < numBytes) {
bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
in.readBytes(bytes, 0, numBytes);
bytesReader.reset(bytes, 0, numBytes);
// Sub-blocks of a single floor block are always
// written one after another -- tail recurse:
@ -2575,12 +2616,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
// lazily catch up on metadata decode:
final int limit = getTermBlockOrd();
boolean absolute = metaDataUpto == 0;
assert limit > 0;
// We must set/incr state.termCount because
// postings impl can look at this
state.termBlockOrd = metaDataUpto;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
@ -2592,17 +2630,24 @@ public class BlockTreeTermsReader extends FieldsProducer {
// TODO: if docFreq were bulk decoded we could
// just skipN here:
// stats
state.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
// metadata
for (int i = 0; i < longsSize; i++) {
longs[i] = bytesReader.readVLong();
}
postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute);
postingsReader.nextTerm(fieldInfo, state);
metaDataUpto++;
state.termBlockOrd++;
absolute = false;
}
state.termBlockOrd = metaDataUpto;
}
// Used only by assert

View File

@ -104,13 +104,12 @@ import org.apache.lucene.util.packed.PackedInts;
* and decoding the Postings Metadata and Term Metadata sections.</p>
*
* <ul>
* <!-- TODO: expand on this, its not really correct and doesnt explain sub-blocks etc -->
* <li>TermsDict (.tim) --&gt; Header, <i>Postings Metadata</i>, Block<sup>NumBlocks</sup>,
* <li>TermsDict (.tim) --&gt; Header, <i>Postings Header</i>, NodeBlock<sup>NumBlocks</sup>,
* FieldSummary, DirOffset</li>
* <li>Block --&gt; SuffixBlock, StatsBlock, MetadataBlock</li>
* <li>SuffixBlock --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup></li>
* <li>StatsBlock --&gt; StatsLength, &lt;DocFreq, TotalTermFreq&gt;<sup>EntryCount</sup></li>
* <li>MetadataBlock --&gt; MetaLength, &lt;<i>Term Metadata</i>&gt;<sup>EntryCount</sup></li>
* <li>NodeBlock --&gt; (OuterNode | InnerNode)</li>
* <li>OuterNode --&gt; EntryCount, SuffixLength, Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>Term Metadata</i>&gt;<sup>EntryCount</sup></li>
* <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>Term Metadata ? </i>&gt;<sup>EntryCount</sup></li>
* <li>TermStats --&gt; DocFreq, TotalTermFreq </li>
* <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>,
* SumDocFreq, DocCount&gt;<sup>NumFields</sup></li>
* <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
@ -136,7 +135,9 @@ import org.apache.lucene.util.packed.PackedInts;
* <li>DocCount is the number of documents that have at least one posting for this field.</li>
* <li>PostingsMetadata and TermMetadata are plugged into by the specific postings implementation:
* these contain arbitrary per-file data (such as parameters or versioning information)
* and per-term data (such as pointers to inverted files).
* and per-term data (such as pointers to inverted files).</li>
* <li>For inner nodes of the tree, every entry will steal one bit to mark whether it points
* to child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted </li>
* </ul>
* <a name="Termindex" id="Termindex"></a>
* <h3>Term Index</h3>
@ -237,8 +238,9 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
public final long sumTotalTermFreq;
public final long sumDocFreq;
public final int docCount;
private final int longsSize;
public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount) {
public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
@ -248,6 +250,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.longsSize = longsSize;
}
}
@ -300,7 +303,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// System.out.println("BTW.init seg=" + state.segmentName);
postingsWriter.start(out); // have consumer write its format/header
postingsWriter.init(out); // have consumer write its format/header
success = true;
} finally {
if (!success) {
@ -354,12 +357,13 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
private static final class PendingTerm extends PendingEntry {
public final BytesRef term;
public final TermStats stats;
// stats + metadata
public final BlockTermState state;
public PendingTerm(BytesRef term, TermStats stats) {
public PendingTerm(BytesRef term, BlockTermState state) {
super(true);
this.term = term;
this.stats = stats;
this.state = state;
}
@Override
@ -480,6 +484,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
class TermsWriter extends TermsConsumer {
private final FieldInfo fieldInfo;
private final int longsSize;
private long numTerms;
long sumTotalTermFreq;
long sumDocFreq;
@ -839,11 +844,16 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
final List<FST<BytesRef>> subIndices;
int termCount;
long[] longs = new long[longsSize];
boolean absolute = true;
if (isLeafBlock) {
subIndices = null;
for (PendingEntry ent : slice) {
assert ent.isTerm;
PendingTerm term = (PendingTerm) ent;
BlockTermState state = term.state;
final int suffix = term.term.length - prefixLength;
// if (DEBUG) {
// BytesRef suffixBytes = new BytesRef(suffix);
@ -852,15 +862,25 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// System.out.println(" write term suffix=" + suffixBytes);
// }
// For leaf block we write suffix straight
bytesWriter.writeVInt(suffix);
bytesWriter.writeBytes(term.term.bytes, prefixLength, suffix);
suffixWriter.writeVInt(suffix);
suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
// Write term stats, to separate byte[] blob:
bytesWriter2.writeVInt(term.stats.docFreq);
statsWriter.writeVInt(state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
assert term.stats.totalTermFreq >= term.stats.docFreq: term.stats.totalTermFreq + " vs " + term.stats.docFreq;
bytesWriter2.writeVLong(term.stats.totalTermFreq - term.stats.docFreq);
assert state.totalTermFreq >= state.docFreq: state.totalTermFreq + " vs " + state.docFreq;
statsWriter.writeVLong(state.totalTermFreq - state.docFreq);
}
// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.writeTo(metaWriter);
bytesWriter.reset();
absolute = false;
}
termCount = length;
} else {
@ -869,6 +889,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
for (PendingEntry ent : slice) {
if (ent.isTerm) {
PendingTerm term = (PendingTerm) ent;
BlockTermState state = term.state;
final int suffix = term.term.length - prefixLength;
// if (DEBUG) {
// BytesRef suffixBytes = new BytesRef(suffix);
@ -878,16 +899,34 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// }
// For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block
bytesWriter.writeVInt(suffix<<1);
bytesWriter.writeBytes(term.term.bytes, prefixLength, suffix);
suffixWriter.writeVInt(suffix<<1);
suffixWriter.writeBytes(term.term.bytes, prefixLength, suffix);
// Write term stats, to separate byte[] blob:
bytesWriter2.writeVInt(term.stats.docFreq);
statsWriter.writeVInt(state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
assert term.stats.totalTermFreq >= term.stats.docFreq;
bytesWriter2.writeVLong(term.stats.totalTermFreq - term.stats.docFreq);
assert state.totalTermFreq >= state.docFreq;
statsWriter.writeVLong(state.totalTermFreq - state.docFreq);
}
// TODO: now that terms dict "sees" these longs,
// we can explore better column-stride encodings
// to encode all long[0]s for this block at
// once, all long[1]s, etc., e.g. using
// Simple64. Alternatively, we could interleave
// stats + meta ... no reason to have them
// separate anymore:
// Write term meta data
postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute);
for (int pos = 0; pos < longsSize; pos++) {
assert longs[pos] >= 0;
metaWriter.writeVLong(longs[pos]);
}
bytesWriter.writeTo(metaWriter);
bytesWriter.reset();
absolute = false;
termCount++;
} else {
PendingBlock block = (PendingBlock) ent;
@ -897,8 +936,8 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block
bytesWriter.writeVInt((suffix<<1)|1);
bytesWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
suffixWriter.writeVInt((suffix<<1)|1);
suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
assert block.fp < startFP;
// if (DEBUG) {
@ -908,7 +947,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// System.out.println(" write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// }
bytesWriter.writeVLong(startFP - block.fp);
suffixWriter.writeVLong(startFP - block.fp);
subIndices.add(block.index);
}
}
@ -921,17 +960,19 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
// search on lookup
// Write suffixes byte[] blob to terms dict output:
out.writeVInt((int) (bytesWriter.getFilePointer() << 1) | (isLeafBlock ? 1:0));
bytesWriter.writeTo(out);
bytesWriter.reset();
out.writeVInt((int) (suffixWriter.getFilePointer() << 1) | (isLeafBlock ? 1:0));
suffixWriter.writeTo(out);
suffixWriter.reset();
// Write term stats byte[] blob
out.writeVInt((int) bytesWriter2.getFilePointer());
bytesWriter2.writeTo(out);
bytesWriter2.reset();
out.writeVInt((int) statsWriter.getFilePointer());
statsWriter.writeTo(out);
statsWriter.reset();
// Have postings writer write block
postingsWriter.flushTermsBlock(futureTermCount+termCount, termCount);
// Write term meta data byte[] blob
out.writeVInt((int) metaWriter.getFilePointer());
metaWriter.writeTo(out);
metaWriter.reset();
// Remove slice replaced by block:
slice.clear();
@ -967,7 +1008,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
PackedInts.COMPACT,
true, 15);
postingsWriter.setField(fieldInfo);
this.longsSize = postingsWriter.setField(fieldInfo);
}
@Override
@ -998,8 +1039,13 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
//if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);
blockBuilder.add(Util.toIntsRef(text, scratchIntsRef), noOutputs.getNoOutput());
pending.add(new PendingTerm(BytesRef.deepCopyOf(text), stats));
postingsWriter.finishTerm(stats);
BlockTermState state = postingsWriter.newTermState();
state.docFreq = stats.docFreq;
state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), state);
pending.add(term);
numTerms++;
}
@ -1038,7 +1084,8 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
indexStartFP,
sumTotalTermFreq,
sumDocFreq,
docCount));
docCount,
longsSize));
} else {
assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
assert sumDocFreq == 0;
@ -1046,8 +1093,10 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
}
}
private final RAMOutputStream suffixWriter = new RAMOutputStream();
private final RAMOutputStream statsWriter = new RAMOutputStream();
private final RAMOutputStream metaWriter = new RAMOutputStream();
private final RAMOutputStream bytesWriter = new RAMOutputStream();
private final RAMOutputStream bytesWriter2 = new RAMOutputStream();
}
@Override
@ -1072,6 +1121,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
out.writeVInt(field.longsSize);
indexOut.writeVLong(field.indexStartFP);
}
writeTrailer(out, dirStart);

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.Bits;
/** The core terms dictionaries (BlockTermsReader,
@ -55,7 +56,7 @@ public abstract class PostingsReaderBase implements Closeable {
public abstract BlockTermState newTermState() throws IOException;
/** Actually decode metadata for next term */
public abstract void nextTerm(FieldInfo fieldInfo, BlockTermState state) throws IOException;
public abstract void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
/** Must fully consume state, since after this call that
* TermState may be reused. */
@ -68,9 +69,4 @@ public abstract class PostingsReaderBase implements Closeable {
@Override
public abstract void close() throws IOException;
/** Reads data for all terms in the next block; this
* method should merely load the byte[] blob but not
* decode, which is done in {@link #nextTerm}. */
public abstract void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState termState) throws IOException;
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.codecs;
import java.io.IOException;
import java.io.Closeable;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.index.FieldInfo;
@ -48,25 +49,31 @@ public abstract class PostingsWriterBase extends PostingsConsumer implements Clo
/** Called once after startup, before any terms have been
* added. Implementations typically write a header to
* the provided {@code termsOut}. */
public abstract void start(IndexOutput termsOut) throws IOException;
public abstract void init(IndexOutput termsOut) throws IOException;
/** Return a newly created empty TermState */
public abstract BlockTermState newTermState() throws IOException;
/** Start a new term. Note that a matching call to {@link
* #finishTerm(TermStats)} is done, only if the term has at least one
* #finishTerm(long[], DataOutput, TermStats)} is done, only if the term has at least one
* document. */
public abstract void startTerm() throws IOException;
/** Flush count terms starting at start "backwards", as a
* block. start is a negative offset from the end of the
* terms stack, ie bigger start means further back in
* the stack. */
public abstract void flushTermsBlock(int start, int count) throws IOException;
/** Finishes the current term. The provided {@link
* TermStats} contains the term's summary statistics. */
public abstract void finishTerm(TermStats stats) throws IOException;
* BlockTermState} contains the term's summary statistics,
* and will holds metadata from PBF when returned */
public abstract void finishTerm(BlockTermState state) throws IOException;
/** Called when the writing switches to another field. */
public abstract void setField(FieldInfo fieldInfo);
/**
* Encode metadata as long[] and byte[]. {@code absolute} controls
* whether current term is delta encoded according to latest term.
*/
public abstract void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException;
/**
* Return the fixed length of longs,
* called when the writing switches to another field. */
public abstract int setField(FieldInfo fieldInfo);
@Override
public abstract void close() throws IOException;

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -121,11 +122,6 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
long proxOffset;
long skipOffset;
// Only used by the "primary" TermState -- clones don't
// copy this (basically they are "transient"):
ByteArrayDataInput bytesReader; // TODO: should this NOT be in the TermState...?
byte[] bytes;
@Override
public StandardTermState clone() {
StandardTermState other = new StandardTermState();
@ -140,11 +136,6 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
freqOffset = other.freqOffset;
proxOffset = other.proxOffset;
skipOffset = other.skipOffset;
// Do not copy bytes, bytesReader (else TermState is
// very heavy, ie drags around the entire block's
// byte[]). On seek back, if next() is in fact used
// (rare!), they will be re-read from disk.
}
@Override
@ -171,38 +162,18 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
}
/* Reads but does not decode the byte[] blob holding
metadata for the current terms block */
@Override
public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
final StandardTermState termState = (StandardTermState) _termState;
final int len = termsIn.readVInt();
// if (DEBUG) System.out.println(" SPR.readTermsBlock bytes=" + len + " ts=" + _termState);
if (termState.bytes == null) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
termState.bytesReader = new ByteArrayDataInput();
} else if (termState.bytes.length < len) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
}
termsIn.readBytes(termState.bytes, 0, len);
termState.bytesReader.reset(termState.bytes, 0, len);
}
@Override
public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState)
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final StandardTermState termState = (StandardTermState) _termState;
// if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition());
final boolean isFirstTerm = termState.termBlockOrd == 0;
if (isFirstTerm) {
termState.freqOffset = termState.bytesReader.readVLong();
} else {
termState.freqOffset += termState.bytesReader.readVLong();
if (absolute) {
termState.freqOffset = 0;
termState.proxOffset = 0;
}
termState.freqOffset += in.readVLong();
/*
if (DEBUG) {
System.out.println(" dF=" + termState.docFreq);
@ -212,7 +183,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
assert termState.freqOffset < freqIn.length();
if (termState.docFreq >= skipMinimum) {
termState.skipOffset = termState.bytesReader.readVLong();
termState.skipOffset = in.readVLong();
// if (DEBUG) System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
assert termState.freqOffset + termState.skipOffset < freqIn.length();
} else {
@ -220,11 +191,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
if (isFirstTerm) {
termState.proxOffset = termState.bytesReader.readVLong();
} else {
termState.proxOffset += termState.bytesReader.readVLong();
}
termState.proxOffset += in.readVLong();
// if (DEBUG) System.out.println(" proxFP=" + termState.proxOffset);
}
}

View File

@ -152,11 +152,6 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
// freq is always implicitly totalTermFreq in this case.
int singletonDocID;
// Only used by the "primary" TermState -- clones don't
// copy this (basically they are "transient"):
ByteArrayDataInput bytesReader; // TODO: should this NOT be in the TermState...?
byte[] bytes;
@Override
public IntBlockTermState clone() {
IntBlockTermState other = new IntBlockTermState();
@ -174,11 +169,6 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
lastPosBlockOffset = other.lastPosBlockOffset;
skipOffset = other.skipOffset;
singletonDocID = other.singletonDocID;
// Do not copy bytes, bytesReader (else TermState is
// very heavy, ie drags around the entire block's
// byte[]). On seek back, if next() is in fact used
// (rare!), they will be re-read from disk.
}
@Override
@ -197,78 +187,37 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
IOUtils.close(docIn, posIn, payIn);
}
/* Reads but does not decode the byte[] blob holding
metadata for the current terms block */
@Override
public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
final int numBytes = termsIn.readVInt();
if (termState.bytes == null) {
termState.bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
termState.bytesReader = new ByteArrayDataInput();
} else if (termState.bytes.length < numBytes) {
termState.bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
termsIn.readBytes(termState.bytes, 0, numBytes);
termState.bytesReader.reset(termState.bytes, 0, numBytes);
}
@Override
public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState)
public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
final boolean isFirstTerm = termState.termBlockOrd == 0;
final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean fieldHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
final boolean fieldHasPayloads = fieldInfo.hasPayloads();
final DataInput in = termState.bytesReader;
if (isFirstTerm) {
if (termState.docFreq == 1) {
termState.singletonDocID = in.readVInt();
termState.docStartFP = 0;
} else {
termState.singletonDocID = -1;
termState.docStartFP = in.readVLong();
}
if (fieldHasPositions) {
termState.posStartFP = in.readVLong();
if (termState.totalTermFreq > BLOCK_SIZE) {
termState.lastPosBlockOffset = in.readVLong();
} else {
termState.lastPosBlockOffset = -1;
}
if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= BLOCK_SIZE) {
termState.payStartFP = in.readVLong();
} else {
termState.payStartFP = -1;
}
// nocommit: use old version
if (absolute) {
termState.docStartFP = 0;
termState.posStartFP = 0;
termState.payStartFP = 0;
}
termState.docStartFP += longs[0];
if (fieldHasPositions) {
termState.posStartFP += longs[1];
if (fieldHasOffsets || fieldHasPayloads) {
termState.payStartFP += longs[2];
}
}
if (termState.docFreq == 1) {
termState.singletonDocID = in.readVInt();
} else {
if (termState.docFreq == 1) {
termState.singletonDocID = in.readVInt();
termState.singletonDocID = -1;
}
if (fieldHasPositions) {
if (termState.totalTermFreq > BLOCK_SIZE) {
termState.lastPosBlockOffset = in.readVLong();
} else {
termState.singletonDocID = -1;
termState.docStartFP += in.readVLong();
}
if (fieldHasPositions) {
termState.posStartFP += in.readVLong();
if (termState.totalTermFreq > BLOCK_SIZE) {
termState.lastPosBlockOffset = in.readVLong();
} else {
termState.lastPosBlockOffset = -1;
}
if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= BLOCK_SIZE) {
long delta = in.readVLong();
if (termState.payStartFP == -1) {
termState.payStartFP = delta;
} else {
termState.payStartFP += delta;
}
}
termState.lastPosBlockOffset = -1;
}
}

View File

@ -25,14 +25,15 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
@ -71,7 +72,8 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
final IndexOutput posOut;
final IndexOutput payOut;
private IndexOutput termsOut;
final static IntBlockTermState emptyState = new IntBlockTermState();
IntBlockTermState lastState;
// How current field indexes postings:
private boolean fieldHasFreqs;
@ -79,7 +81,7 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
// Holds starting file pointers for each term:
// Holds starting file pointers for current term:
private long docTermStartFP;
private long posTermStartFP;
private long payTermStartFP;
@ -188,21 +190,50 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
this(state, PackedInts.COMPACT);
}
private final static class IntBlockTermState extends BlockTermState {
long docTermStartFP = 0;
long posTermStartFP = 0;
long payTermStartFP = 0;
long skipOffset = -1;
long lastPosBlockOffset = -1;
int singletonDocID = -1;
@Override
public String toString() {
return super.toString() + " docStartFP=" + docTermStartFP + " posStartFP=" + posTermStartFP + " payStartFP=" + payTermStartFP + " lastPosBlockOffset=" + lastPosBlockOffset + " singletonDocID=" + singletonDocID;
}
}
@Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
public IntBlockTermState newTermState() {
return new IntBlockTermState();
}
@Override
public void init(IndexOutput termsOut) throws IOException {
CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
termsOut.writeVInt(BLOCK_SIZE);
}
// nocommit better name?
@Override
public void setField(FieldInfo fieldInfo) {
public int setField(FieldInfo fieldInfo) {
IndexOptions indexOptions = fieldInfo.getIndexOptions();
fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
fieldHasPayloads = fieldInfo.hasPayloads();
skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads);
lastState = emptyState;
if (fieldHasPositions) {
if (fieldHasPayloads || fieldHasOffsets) {
return 3; // doc + pos + pay FP
} else {
return 2; // doc + pos FP
}
} else {
return 1; // doc FP
}
}
@Override
@ -348,37 +379,18 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
}
}
private static class PendingTerm {
public final long docStartFP;
public final long posStartFP;
public final long payStartFP;
public final long skipOffset;
public final long lastPosBlockOffset;
public final int singletonDocID;
public PendingTerm(long docStartFP, long posStartFP, long payStartFP, long skipOffset, long lastPosBlockOffset, int singletonDocID) {
this.docStartFP = docStartFP;
this.posStartFP = posStartFP;
this.payStartFP = payStartFP;
this.skipOffset = skipOffset;
this.lastPosBlockOffset = lastPosBlockOffset;
this.singletonDocID = singletonDocID;
}
}
private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(TermStats stats) throws IOException {
assert stats.docFreq > 0;
public void finishTerm(BlockTermState _state) throws IOException {
IntBlockTermState state = (IntBlockTermState) _state;
assert state.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert stats.docFreq == docCount: stats.docFreq + " vs " + docCount;
assert state.docFreq == docCount: state.docFreq + " vs " + docCount;
// if (DEBUG) {
// System.out.println("FPW.finishTerm docFreq=" + stats.docFreq);
// System.out.println("FPW.finishTerm docFreq=" + state.docFreq);
// }
// if (DEBUG) {
@ -389,7 +401,7 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
// docFreq == 1, don't write the single docid/freq to a separate file along with a pointer to it.
final int singletonDocID;
if (stats.docFreq == 1) {
if (state.docFreq == 1) {
// pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
singletonDocID = docDeltaBuffer[0];
} else {
@ -420,8 +432,8 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
// totalTermFreq is just total number of positions(or payloads, or offsets)
// associated with current term.
assert stats.totalTermFreq != -1;
if (stats.totalTermFreq > BLOCK_SIZE) {
assert state.totalTermFreq != -1;
if (state.totalTermFreq > BLOCK_SIZE) {
// record file offset for last pos in last block
lastPosBlockOffset = posOut.getFilePointer() - posTermStartFP;
} else {
@ -486,7 +498,7 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
}
}
// if (DEBUG) {
// System.out.println(" totalTermFreq=" + stats.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
// System.out.println(" totalTermFreq=" + state.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
// }
} else {
lastPosBlockOffset = -1;
@ -505,76 +517,48 @@ public final class Lucene41PostingsWriter extends PostingsWriterBase {
// System.out.println(" no skip: docCount=" + docCount);
// }
}
long payStartFP;
if (stats.totalTermFreq >= BLOCK_SIZE) {
payStartFP = payTermStartFP;
} else {
payStartFP = -1;
}
// if (DEBUG) {
// System.out.println(" payStartFP=" + payStartFP);
// }
pendingTerms.add(new PendingTerm(docTermStartFP, posTermStartFP, payStartFP, skipOffset, lastPosBlockOffset, singletonDocID));
state.docTermStartFP = docTermStartFP;
state.posTermStartFP = posTermStartFP;
state.payTermStartFP = payTermStartFP;
state.singletonDocID = singletonDocID;
state.skipOffset = skipOffset;
state.lastPosBlockOffset = lastPosBlockOffset;
docBufferUpto = 0;
posBufferUpto = 0;
lastDocID = 0;
docCount = 0;
}
private final RAMOutputStream bytesWriter = new RAMOutputStream();
// nocommit explain about the "don't care" values
@Override
public void flushTermsBlock(int start, int count) throws IOException {
if (count == 0) {
termsOut.writeByte((byte) 0);
return;
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
IntBlockTermState state = (IntBlockTermState)_state;
if (absolute) {
lastState = emptyState;
}
assert start <= pendingTerms.size();
assert count <= start;
final int limit = pendingTerms.size() - start + count;
long lastDocStartFP = 0;
long lastPosStartFP = 0;
long lastPayStartFP = 0;
for(int idx=limit-count; idx<limit; idx++) {
PendingTerm term = pendingTerms.get(idx);
if (term.singletonDocID == -1) {
bytesWriter.writeVLong(term.docStartFP - lastDocStartFP);
lastDocStartFP = term.docStartFP;
} else {
bytesWriter.writeVInt(term.singletonDocID);
}
if (fieldHasPositions) {
bytesWriter.writeVLong(term.posStartFP - lastPosStartFP);
lastPosStartFP = term.posStartFP;
if (term.lastPosBlockOffset != -1) {
bytesWriter.writeVLong(term.lastPosBlockOffset);
}
if ((fieldHasPayloads || fieldHasOffsets) && term.payStartFP != -1) {
bytesWriter.writeVLong(term.payStartFP - lastPayStartFP);
lastPayStartFP = term.payStartFP;
}
}
if (term.skipOffset != -1) {
bytesWriter.writeVLong(term.skipOffset);
longs[0] = state.docTermStartFP - lastState.docTermStartFP;
if (fieldHasPositions) {
longs[1] = state.posTermStartFP - lastState.posTermStartFP;
if (fieldHasPayloads || fieldHasOffsets) {
longs[2] = state.payTermStartFP - lastState.payTermStartFP;
}
}
termsOut.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(termsOut);
bytesWriter.reset();
// Remove the terms we just wrote:
pendingTerms.subList(limit-count, limit).clear();
if (state.singletonDocID != -1) {
out.writeVInt(state.singletonDocID);
}
if (fieldHasPositions) {
if (state.lastPosBlockOffset != -1) {
out.writeVLong(state.lastPosBlockOffset);
}
}
if (state.skipOffset != -1) {
out.writeVLong(state.skipOffset);
}
lastState = state;
}
@Override

View File

@ -24,6 +24,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.codecs.TermStats;
@ -33,6 +34,7 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.BytesRef;
@ -67,7 +69,6 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
*/
final int maxSkipLevels = 10;
final int totalNumDocs;
IndexOutput termsOut;
IndexOptions indexOptions;
boolean storePayloads;
@ -81,6 +82,9 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
int lastPosition;
int lastOffset;
final static StandardTermState emptyState = new StandardTermState();
StandardTermState lastState;
// private String segment;
/** Creates a {@link Lucene40PostingsWriter}, with the
@ -134,14 +138,19 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
}
@Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
public void init(IndexOutput termsOut) throws IOException {
CodecUtil.writeHeader(termsOut, Lucene40PostingsReader.TERMS_CODEC, Lucene40PostingsReader.VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
public BlockTermState newTermState() {
return new StandardTermState();
}
@Override
public void startTerm() {
freqStart = freqOut.getFilePointer();
@ -159,7 +168,7 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
// Currently, this instance is re-used across fields, so
// our parent calls setField whenever the field changes
@Override
public void setField(FieldInfo fieldInfo) {
public int setField(FieldInfo fieldInfo) {
//System.out.println("SPW: setField");
/*
if (BlockTreeTermsWriter.DEBUG && fieldInfo.name.equals("id")) {
@ -173,8 +182,10 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
storePayloads = fieldInfo.hasPayloads();
lastState = emptyState;
//System.out.println(" set init blockFreqStart=" + freqStart);
//System.out.println(" set init blockProxStart=" + proxStart);
return 0;
}
int lastDocID;
@ -265,94 +276,48 @@ public final class Lucene40PostingsWriter extends PostingsWriterBase {
public void finishDoc() {
}
private static class PendingTerm {
public final long freqStart;
public final long proxStart;
public final long skipOffset;
public PendingTerm(long freqStart, long proxStart, long skipOffset) {
this.freqStart = freqStart;
this.proxStart = proxStart;
this.skipOffset = skipOffset;
}
private static class StandardTermState extends BlockTermState {
public long freqStart;
public long proxStart;
public long skipOffset;
}
private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(TermStats stats) throws IOException {
public void finishTerm(BlockTermState _state) throws IOException {
StandardTermState state = (StandardTermState)_state;
// if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
assert stats.docFreq > 0;
assert state.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert stats.docFreq == df;
final long skipOffset;
assert state.docFreq == df;
state.freqStart = freqStart;
state.proxStart = proxStart;
if (df >= skipMinimum) {
skipOffset = skipListWriter.writeSkip(freqOut)-freqStart;
state.skipOffset = skipListWriter.writeSkip(freqOut)-freqStart;
} else {
skipOffset = -1;
state.skipOffset = -1;
}
pendingTerms.add(new PendingTerm(freqStart, proxStart, skipOffset));
lastDocID = 0;
df = 0;
}
private final RAMOutputStream bytesWriter = new RAMOutputStream();
@Override
public void flushTermsBlock(int start, int count) throws IOException {
//if (DEBUG) System.out.println("SPW: flushTermsBlock start=" + start + " count=" + count + " left=" + (pendingTerms.size()-count) + " pendingTerms.size()=" + pendingTerms.size());
if (count == 0) {
termsOut.writeByte((byte) 0);
return;
public void encodeTerm(long[] empty, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
StandardTermState state = (StandardTermState)_state;
if (absolute) {
lastState = emptyState;
}
assert start <= pendingTerms.size();
assert count <= start;
final int limit = pendingTerms.size() - start + count;
final PendingTerm firstTerm = pendingTerms.get(limit - count);
// First term in block is abs coded:
bytesWriter.writeVLong(firstTerm.freqStart);
if (firstTerm.skipOffset != -1) {
assert firstTerm.skipOffset > 0;
bytesWriter.writeVLong(firstTerm.skipOffset);
out.writeVLong(state.freqStart - lastState.freqStart);
if (state.skipOffset != -1) {
assert state.skipOffset > 0;
out.writeVLong(state.skipOffset);
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
bytesWriter.writeVLong(firstTerm.proxStart);
out.writeVLong(state.proxStart - lastState.proxStart);
}
long lastFreqStart = firstTerm.freqStart;
long lastProxStart = firstTerm.proxStart;
for(int idx=limit-count+1; idx<limit; idx++) {
final PendingTerm term = pendingTerms.get(idx);
//if (DEBUG) System.out.println(" write term freqStart=" + term.freqStart);
// The rest of the terms term are delta coded:
bytesWriter.writeVLong(term.freqStart - lastFreqStart);
lastFreqStart = term.freqStart;
if (term.skipOffset != -1) {
assert term.skipOffset > 0;
bytesWriter.writeVLong(term.skipOffset);
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
bytesWriter.writeVLong(term.proxStart - lastProxStart);
lastProxStart = term.proxStart;
}
}
termsOut.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(termsOut);
bytesWriter.reset();
// Remove the terms we just wrote:
pendingTerms.subList(limit-count, limit).clear();
lastState = state;
}
@Override

View File

@ -183,12 +183,37 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
}
postingsWriter = new PulsingPostingsWriter(totTFCutoff, postingsWriter);
postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter);
}
final FieldsConsumer fields;
final int t1 = random.nextInt(2);
if (random.nextBoolean()) {
/*
final int t1 = random.nextInt(4);
if (t1 == 0) {
boolean success = false;
try {
fields = new TempFSTTermsWriter(state, postingsWriter);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else if (t1 == 1) {
boolean success = false;
try {
fields = new TempFSTOrdTermsWriter(state, postingsWriter);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else if (t1 == 2) {
*/
if (t1 == 0) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
@ -322,12 +347,36 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
}
postingsReader = new PulsingPostingsReader(postingsReader);
postingsReader = new PulsingPostingsReader(state, postingsReader);
}
final FieldsProducer fields;
if (random.nextBoolean()) {
final int t1 = random.nextInt(2);
/*
final int t1 = random.nextInt(4);
if (t1 == 0) {
boolean success = false;
try {
fields = new TempFSTTermsReader(state, postingsReader);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
} else if (t1 == 1) {
boolean success = false;
try {
fields = new TempFSTOrdTermsReader(state, postingsReader);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
} else if (t1 == 2) {
*/
if (t1 == 0) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading BlockTree terms dict");

View File

@ -57,8 +57,8 @@ public final class NestedPulsingPostingsFormat extends PostingsFormat {
try {
docsWriter = new Lucene41PostingsWriter(state);
pulsingWriterInner = new PulsingPostingsWriter(2, docsWriter);
pulsingWriter = new PulsingPostingsWriter(1, pulsingWriterInner);
pulsingWriterInner = new PulsingPostingsWriter(state, 2, docsWriter);
pulsingWriter = new PulsingPostingsWriter(state, 1, pulsingWriterInner);
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter,
BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
success = true;
@ -78,8 +78,8 @@ public final class NestedPulsingPostingsFormat extends PostingsFormat {
boolean success = false;
try {
docsReader = new Lucene41PostingsReader(state.directory, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
pulsingReaderInner = new PulsingPostingsReader(docsReader);
pulsingReader = new PulsingPostingsReader(pulsingReaderInner);
pulsingReaderInner = new PulsingPostingsReader(state, docsReader);
pulsingReader = new PulsingPostingsReader(state, pulsingReaderInner);
FieldsProducer ret = new BlockTreeTermsReader(
state.directory, state.fieldInfos, state.segmentInfo,
pulsingReader,

View File

@ -169,7 +169,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
final PostingsFormat format;
if ("random".equals(TEST_POSTINGSFORMAT)) {
format = PostingsFormat.forName("Lucene41");
} else if ("TempRandom".equals(TEST_POSTINGSFORMAT)) {
} else if ("MockRandom".equals(TEST_POSTINGSFORMAT)) {
format = new MockRandomPostingsFormat(new Random(random.nextLong()));
} else {
format = PostingsFormat.forName(TEST_POSTINGSFORMAT);