LUCENE-3580: fix postingsreaders to obey DISI contract

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1203190 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-11-17 13:58:59 +00:00
parent 87ac7de453
commit 4597d97e68
7 changed files with 134 additions and 65 deletions

View File

@ -994,6 +994,7 @@ public class Lucene3xFields extends FieldsProducer {
public PreDocsEnum reset(SegmentTermEnum termEnum, Bits liveDocs) throws IOException {
docs.setLiveDocs(liveDocs);
docs.seek(termEnum);
docID = -1;
return this;
}
@ -1050,6 +1051,7 @@ public class Lucene3xFields extends FieldsProducer {
public DocsAndPositionsEnum reset(SegmentTermEnum termEnum, Bits liveDocs) throws IOException {
pos.setLiveDocs(liveDocs);
pos.seek(termEnum);
docID = -1;
return this;
}

View File

@ -273,7 +273,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
int limit; // number of docs in this posting
int ord; // how many docs we've read
int doc; // doc we last read
int doc = -1; // doc we last read
int accum; // accumulator for doc deltas
int freq; // freq we last read
Bits liveDocs;
@ -306,7 +307,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
limit = termState.docFreq;
assert limit > 0;
ord = 0;
doc = 0;
doc = -1;
accum = 0;
// if (DEBUG) System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
skipped = false;
@ -329,9 +331,9 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
final int code = freqIn.readVInt();
// if (DEBUG) System.out.println(" code=" + code);
if (omitTF) {
doc += code;
accum += code;
} else {
doc += code >>> 1; // shift off low bit
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
@ -339,13 +341,13 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
}
if (liveDocs == null || liveDocs.get(doc)) {
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
//if (DEBUG) System.out.println(" stpr.nextDoc return doc=" + doc);
return doc;
return (doc = accum);
}
@Override
@ -360,9 +362,9 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
// manually inlined call to next() for speed
final int code = freqIn.readVInt();
if (omitTF) {
doc += code;
accum += code;
} else {
doc += code >>> 1; // shift off low bit
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
@ -370,8 +372,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
}
if (liveDocs == null || liveDocs.get(doc)) {
docs[i] = doc;
if (liveDocs == null || liveDocs.get(accum)) {
docs[i] = doc = accum;
freqs[i] = freq;
++i;
}
@ -422,7 +424,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
// Skipper moved
ord = newOrd;
doc = skipper.getDoc();
doc = accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
}
}
@ -444,7 +446,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
int limit; // number of docs in this posting
int ord; // how many docs we've read
int doc; // doc we last read
int doc = -1; // doc we last read
int accum; // accumulator for doc deltas
int freq; // freq we last read
int position;
@ -482,7 +485,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
assert limit > 0;
ord = 0;
doc = 0;
doc = -1;
accum = 0;
position = 0;
skipped = false;
@ -510,7 +514,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
// Decode next doc/freq pair
final int code = freqIn.readVInt();
doc += code >>> 1; // shift off low bit
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
@ -518,7 +522,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
posPendingCount += freq;
if (liveDocs == null || liveDocs.get(doc)) {
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
@ -526,7 +530,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
position = 0;
// if (DEBUG) System.out.println(" return doc=" + doc);
return doc;
return (doc = accum);
}
@Override
@ -572,7 +576,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
doc = skipper.getDoc();
doc = accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
lazyProxPointer = skipper.getProxPointer();
posPendingCount = 0;
@ -636,7 +640,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
int limit; // number of docs in this posting
int ord; // how many docs we've read
int doc; // doc we last read
int doc = -1; // doc we last read
int accum; // accumulator for doc deltas
int freq; // freq we last read
int position;
@ -679,7 +684,8 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
limit = termState.docFreq;
ord = 0;
doc = 0;
doc = -1;
accum = 0;
position = 0;
skipped = false;
@ -707,7 +713,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
// Decode next doc/freq pair
final int code = freqIn.readVInt();
doc += code >>> 1; // shift off low bit
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
@ -715,7 +721,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
posPendingCount += freq;
if (liveDocs == null || liveDocs.get(doc)) {
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
@ -723,7 +729,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
position = 0;
//System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return doc;
return (doc = accum);
}
@Override
@ -769,7 +775,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
doc = skipper.getDoc();
doc = accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
lazyProxPointer = skipper.getProxPointer();
posPendingCount = 0;

View File

@ -273,7 +273,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
private Bits liveDocs;
private int docUpto;
private int docID;
private int docID = -1;
private int accum;
private int freq;
private int payloadLen;
private int numDocs;
@ -295,7 +296,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
this.liveDocs = liveDocs;
docID = 0;
docID = -1;
accum = 0;
docUpto = 0;
payloadLen = 0;
this.numDocs = numDocs;
@ -314,12 +316,12 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
docUpto++;
if (indexOptions == IndexOptions.DOCS_ONLY) {
docID += in.readVInt();
accum += in.readVInt();
freq = 1;
} else {
final int code = in.readVInt();
docID += code >>> 1;
if (VERBOSE) System.out.println(" docID=" + docID + " code=" + code);
accum += code >>> 1;
if (VERBOSE) System.out.println(" docID=" + accum + " code=" + code);
if ((code & 1) != 0) {
freq = 1;
} else {
@ -343,9 +345,9 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
}
if (liveDocs == null || liveDocs.get(docID)) {
if (VERBOSE) System.out.println(" return docID=" + docID + " freq=" + freq);
return docID;
if (liveDocs == null || liveDocs.get(accum)) {
if (VERBOSE) System.out.println(" return docID=" + accum + " freq=" + freq);
return (docID = accum);
}
}
}
@ -380,7 +382,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
private Bits liveDocs;
private int docUpto;
private int docID;
private int docID = -1;
private int accum;
private int freq;
private int numDocs;
private int posPending;
@ -412,7 +415,8 @@ public class MemoryPostingsFormat extends PostingsFormat {
in.reset(buffer, 0, bufferIn.length - bufferIn.offset);
System.arraycopy(bufferIn.bytes, bufferIn.offset, buffer, 0, bufferIn.length - bufferIn.offset);
this.liveDocs = liveDocs;
docID = 0;
docID = -1;
accum = 0;
docUpto = 0;
payload.bytes = buffer;
payloadLength = 0;
@ -436,7 +440,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
docUpto++;
final int code = in.readVInt();
docID += code >>> 1;
accum += code >>> 1;
if ((code & 1) != 0) {
freq = 1;
} else {
@ -444,11 +448,11 @@ public class MemoryPostingsFormat extends PostingsFormat {
assert freq > 0;
}
if (liveDocs == null || liveDocs.get(docID)) {
if (liveDocs == null || liveDocs.get(accum)) {
pos = 0;
posPending = freq;
if (VERBOSE) System.out.println(" return docID=" + docID + " freq=" + freq);
return docID;
if (VERBOSE) System.out.println(" return docID=" + accum + " freq=" + freq);
return (docID = accum);
}
// Skip positions

View File

@ -257,7 +257,8 @@ public class PulsingPostingsReader extends PostingsReaderBase {
private final IndexOptions indexOptions;
private final boolean storePayloads;
private Bits liveDocs;
private int docID;
private int docID = -1;
private int accum;
private int freq;
private int payloadLength;
@ -279,7 +280,8 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
System.arraycopy(termState.postings, 0, postingsBytes, 0, termState.postingsSize);
postings.reset(postingsBytes, 0, termState.postingsSize);
docID = 0;
docID = -1;
accum = 0;
payloadLength = 0;
freq = 1;
this.liveDocs = liveDocs;
@ -302,9 +304,9 @@ public class PulsingPostingsReader extends PostingsReaderBase {
final int code = postings.readVInt();
//System.out.println(" read code=" + code);
if (indexOptions == IndexOptions.DOCS_ONLY) {
docID += code;
accum += code;
} else {
docID += code >>> 1; // shift off low bit
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
@ -332,8 +334,8 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
}
if (liveDocs == null || liveDocs.get(docID)) {
return docID;
if (liveDocs == null || liveDocs.get(accum)) {
return (docID = accum);
}
}
}
@ -365,7 +367,8 @@ public class PulsingPostingsReader extends PostingsReaderBase {
private final boolean storePayloads;
private Bits liveDocs;
private int docID;
private int docID = -1;
private int accum;
private int freq;
private int posPending;
private int position;
@ -394,7 +397,8 @@ public class PulsingPostingsReader extends PostingsReaderBase {
this.liveDocs = liveDocs;
payloadLength = 0;
posPending = 0;
docID = 0;
docID = -1;
accum = 0;
//System.out.println("PR d&p reset storesPayloads=" + storePayloads + " bytes=" + bytes.length + " this=" + this);
return this;
}
@ -414,7 +418,7 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
final int code = postings.readVInt();
docID += code >>> 1; // shift off low bit
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
@ -422,10 +426,10 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
posPending = freq;
if (liveDocs == null || liveDocs.get(docID)) {
if (liveDocs == null || liveDocs.get(accum)) {
//System.out.println(" return docID=" + docID + " freq=" + freq);
position = 0;
return docID;
return (docID = accum);
}
}
}

View File

@ -312,7 +312,8 @@ public class SepPostingsReader extends PostingsReaderBase {
class SepDocsEnum extends DocsEnum {
int docFreq;
int doc;
int doc = -1;
int accum;
int count;
int freq;
long freqStart;
@ -376,7 +377,8 @@ public class SepPostingsReader extends PostingsReaderBase {
// NOTE: unused if docFreq < skipMinimum:
skipFP = termState.skipFP;
count = 0;
doc = 0;
doc = -1;
accum = 0;
skipped = false;
return this;
@ -394,18 +396,18 @@ public class SepPostingsReader extends PostingsReaderBase {
// Decode next doc
//System.out.println("decode docDelta:");
doc += docReader.next();
accum += docReader.next();
if (!omitTF) {
//System.out.println("decode freq:");
freq = freqReader.next();
}
if (liveDocs == null || liveDocs.get(doc)) {
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
return doc;
return (doc = accum);
}
@Override
@ -420,14 +422,14 @@ public class SepPostingsReader extends PostingsReaderBase {
count++;
// manually inlined call to next() for speed
//System.out.println("decode doc");
doc += docReader.next();
accum += docReader.next();
if (!omitTF) {
//System.out.println("decode freq");
freq = freqReader.next();
}
if (liveDocs == null || liveDocs.get(doc)) {
docs[i] = doc;
if (liveDocs == null || liveDocs.get(accum)) {
docs[i] = doc = accum;
freqs[i] = freq;
//System.out.println(" docs[" + i + "]=" + doc + " count=" + count + " dF=" + docFreq);
i++;
@ -488,7 +490,7 @@ public class SepPostingsReader extends PostingsReaderBase {
}
skipper.getDocIndex().seek(docReader);
count = newCount;
doc = skipper.getDoc();
doc = accum = skipper.getDoc();
}
}
@ -505,7 +507,8 @@ public class SepPostingsReader extends PostingsReaderBase {
class SepDocsAndPositionsEnum extends DocsAndPositionsEnum {
int docFreq;
int doc;
int doc = -1;
int accum;
int count;
int freq;
long freqStart;
@ -572,7 +575,8 @@ public class SepPostingsReader extends PostingsReaderBase {
docFreq = termState.docFreq;
count = 0;
doc = 0;
doc = -1;
accum = 0;
pendingPosCount = 0;
pendingPayloadBytes = 0;
skipped = false;
@ -595,20 +599,20 @@ public class SepPostingsReader extends PostingsReaderBase {
// Decode next doc
//System.out.println(" sep d&p read doc");
doc += docReader.next();
accum += docReader.next();
//System.out.println(" sep d&p read freq");
freq = freqReader.next();
pendingPosCount += freq;
if (liveDocs == null || liveDocs.get(doc)) {
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
position = 0;
return doc;
return (doc = accum);
}
@Override
@ -668,7 +672,7 @@ public class SepPostingsReader extends PostingsReaderBase {
posIndex.set(skipper.getPosIndex());
posSeekPending = true;
count = newCount;
doc = skipper.getDoc();
doc = accum = skipper.getDoc();
//System.out.println(" moved to doc=" + doc);
//payloadIn.seek(skipper.getPayloadPointer());
payloadFP = skipper.getPayloadPointer();

View File

@ -314,7 +314,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
private class SimpleTextDocsAndPositionsEnum extends DocsAndPositionsEnum {
private final IndexInput inStart;
private final IndexInput in;
private int docID;
private int docID = -1;
private int tf;
private Bits liveDocs;
private final BytesRef scratch = new BytesRef(10);
@ -336,6 +336,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs) {
this.liveDocs = liveDocs;
nextDocStart = fp;
docID = -1;
return this;
}

View File

@ -22,10 +22,13 @@ import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -332,4 +335,49 @@ public class TestDocsAndPositions extends LuceneTestCase {
dir.close();
}
public void testDocsEnumStart() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
Document doc = new Document();
doc.add(newField("foo", "bar", StringField.TYPE_UNSTORED));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexReader r = getOnlySegmentReader(reader);
DocsEnum disi = r.termDocsEnum(null, "foo", new BytesRef("bar"));
int docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again
disi = r.terms("foo").docs(null, new BytesRef("bar"), disi);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
writer.close();
r.close();
dir.close();
}
public void testDocsAndPositionsEnumStart() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
Document doc = new Document();
doc.add(newField("foo", "bar", TextField.TYPE_UNSTORED));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexReader r = getOnlySegmentReader(reader);
DocsAndPositionsEnum disi = r.termPositionsEnum(null, "foo", new BytesRef("bar"));
int docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again
disi = r.terms("foo").docsAndPositions(null, new BytesRef("bar"), disi);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
writer.close();
r.close();
dir.close();
}
}