sharpen CheckIndex to test .nextPosition() after skipping; fix bug in PulsingCodec's reuse of Docs/AndPositionsEnum with payloads; don't write redundant payload lengths

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1090024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-04-07 21:26:32 +00:00
parent 106f2eedf3
commit 11a6e0de70
5 changed files with 75 additions and 23 deletions

View File

@ -661,10 +661,13 @@ public class CheckIndex {
status.termCount++;
final DocsEnum docs2;
final boolean hasPositions;
if (postings != null) {
docs2 = postings;
hasPositions = true;
} else {
docs2 = docs;
hasPositions = false;
}
int lastDoc = -1;
@ -736,22 +739,60 @@ public class CheckIndex {
// Test skipping
if (docFreq >= 16) {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
docs = terms.docs(delDocs, docs);
final int docID = docs.advance(skipDocID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
} else {
if (docID < skipDocID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
}
final int nextDocID = docs.nextDoc();
if (nextDocID == DocsEnum.NO_MORE_DOCS) {
if (hasPositions) {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
postings = terms.docsAndPositions(delDocs, postings);
final int docID = postings.advance(skipDocID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
} else {
if (docID < skipDocID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
}
final int freq = postings.freq();
if (freq <= 0) {
throw new RuntimeException("termFreq " + freq + " is out of bounds");
}
int lastPosition = -1;
for(int posUpto=0;posUpto<freq;posUpto++) {
final int pos = postings.nextPosition();
if (pos < 0) {
throw new RuntimeException("position " + pos + " is out of bounds");
}
if (pos <= lastPosition) {
throw new RuntimeException("position " + pos + " is <= lastPosition " + lastPosition);
}
lastPosition = pos;
}
final int nextDocID = postings.nextDoc();
if (nextDocID == DocsEnum.NO_MORE_DOCS) {
break;
}
if (nextDocID <= docID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
}
}
if (nextDocID <= docID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
}
} else {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
docs = terms.docs(delDocs, docs);
final int docID = docs.advance(skipDocID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
} else {
if (docID < skipDocID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
}
final int nextDocID = docs.nextDoc();
if (nextDocID == DocsEnum.NO_MORE_DOCS) {
break;
}
if (nextDocID <= docID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
}
}
}
}

View File

@ -233,6 +233,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
private Bits skipDocs;
private int docID;
private int freq;
private int payloadLength;
public PulsingDocsEnum(FieldInfo fieldInfo) {
omitTF = fieldInfo.omitTermFreqAndPositions;
@ -246,6 +247,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
System.arraycopy(termState.postings, 0, bytes, 0, termState.postingsSize);
postings.reset(bytes);
docID = 0;
payloadLength = 0;
freq = 1;
this.skipDocs = skipDocs;
return this;
@ -277,7 +279,6 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
// Skip positions
if (storePayloads) {
int payloadLength = -1;
for(int pos=0;pos<freq;pos++) {
final int posCode = postings.readVInt();
if ((posCode & 1) != 0) {
@ -352,6 +353,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
postings.reset(bytes);
this.skipDocs = skipDocs;
payloadLength = 0;
posPending = 0;
docID = 0;
//System.out.println("PR d&p reset storesPayloads=" + storePayloads + " bytes=" + bytes.length + " this=" + this);
return this;
@ -359,7 +361,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
@Override
public int nextDoc() throws IOException {
//System.out.println("PR d&p nextDoc this=" + this);
//System.out.println("PR.nextDoc this=" + this);
while(true) {
//System.out.println(" cycle skip posPending=" + posPending);
@ -367,15 +369,16 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
skipPositions();
if (postings.eof()) {
//System.out.println("PR END");
//System.out.println(" END");
return docID = NO_MORE_DOCS;
}
//System.out.println(" read doc code");
final int code = postings.readVInt();
docID += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
//System.out.println(" read freq");
freq = postings.readVInt(); // else read freq
}
posPending = freq;
@ -400,10 +403,12 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
@Override
public int advance(int target) throws IOException {
//System.out.println("PR.advance target=" + target);
int doc;
while((doc=nextDoc()) != NO_MORE_DOCS) {
//System.out.println(" nextDoc got doc=" + doc);
if (doc >= target) {
return doc;
return docID = doc;
}
}
return docID = NO_MORE_DOCS;
@ -411,7 +416,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
@Override
public int nextPosition() throws IOException {
//System.out.println("PR d&p nextPosition posPending=" + posPending + " vs freq=" + freq);
//System.out.println("PR.nextPosition posPending=" + posPending + " vs freq=" + freq);
assert posPending > 0;
posPending--;
@ -421,6 +426,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
//System.out.println("PR skip payload=" + payloadLength);
postings.skipBytes(payloadLength);
}
//System.out.println(" read pos code");
final int code = postings.readVInt();
//System.out.println("PR code=" + code);
if ((code & 1) != 0) {
@ -433,16 +439,17 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
position += postings.readVInt();
}
//System.out.println("PR d&p nextPos return pos=" + position + " this=" + this);
//System.out.println(" return pos=" + position + " hasPayload=" + !payloadRetrieved + " posPending=" + posPending + " this=" + this);
return position;
}
private void skipPositions() throws IOException {
//System.out.println("PR.skipPositions: posPending=" + posPending);
while(posPending != 0) {
nextPosition();
}
if (storePayloads && !payloadRetrieved) {
//System.out.println(" skip payload len=" + payloadLength);
//System.out.println(" skip last payload len=" + payloadLength);
postings.skipBytes(payloadLength);
payloadRetrieved = true;
}

View File

@ -201,6 +201,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
if (!omitTF) {
int lastDocID = 0;
int pendingIDX = 0;
int lastPayloadLength = -1;
while(pendingIDX < pendingCount) {
final Position doc = pending[pendingIDX];
@ -217,7 +218,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
}
int lastPos = 0;
int lastPayloadLength = -1;
for(int posIDX=0;posIDX<doc.termFreq;posIDX++) {
final Position pos = pending[pendingIDX++];
assert pos.docID == doc.docID;

View File

@ -421,6 +421,9 @@ public class MockDirectoryWrapper extends Directory {
}
open = false;
if (checkIndexOnClose) {
if (LuceneTestCase.VERBOSE) {
System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
}
if (codecProvider != null) {
if (IndexReader.indexExists(this, codecProvider)) {
_TestUtil.checkIndex(this, codecProvider);

View File

@ -1128,6 +1128,7 @@ public class TestIndexWriter extends LuceneTestCase {
lmp.setMaxMergeDocs(20);
lmp.setMergeFactor(2);
IndexWriter iw = new IndexWriter(dir, conf);
iw.setInfoStream(VERBOSE ? System.out : null);
Document document = new Document();
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
Field.TermVector.YES));