mirror of https://github.com/apache/lucene.git
sharpen CheckIndex to test .nextPosition() after skipping; fix bug in PulsingCodec's reuse of Docs/AndPositionsEnum with payloads; don't write redundant payload lengths
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1090024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
106f2eedf3
commit
11a6e0de70
|
@ -661,10 +661,13 @@ public class CheckIndex {
|
|||
status.termCount++;
|
||||
|
||||
final DocsEnum docs2;
|
||||
final boolean hasPositions;
|
||||
if (postings != null) {
|
||||
docs2 = postings;
|
||||
hasPositions = true;
|
||||
} else {
|
||||
docs2 = docs;
|
||||
hasPositions = false;
|
||||
}
|
||||
|
||||
int lastDoc = -1;
|
||||
|
@ -736,22 +739,60 @@ public class CheckIndex {
|
|||
|
||||
// Test skipping
|
||||
if (docFreq >= 16) {
|
||||
for(int idx=0;idx<7;idx++) {
|
||||
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
|
||||
docs = terms.docs(delDocs, docs);
|
||||
final int docID = docs.advance(skipDocID);
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
} else {
|
||||
if (docID < skipDocID) {
|
||||
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
|
||||
}
|
||||
final int nextDocID = docs.nextDoc();
|
||||
if (nextDocID == DocsEnum.NO_MORE_DOCS) {
|
||||
if (hasPositions) {
|
||||
for(int idx=0;idx<7;idx++) {
|
||||
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
|
||||
postings = terms.docsAndPositions(delDocs, postings);
|
||||
final int docID = postings.advance(skipDocID);
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
} else {
|
||||
if (docID < skipDocID) {
|
||||
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
|
||||
}
|
||||
final int freq = postings.freq();
|
||||
if (freq <= 0) {
|
||||
throw new RuntimeException("termFreq " + freq + " is out of bounds");
|
||||
}
|
||||
int lastPosition = -1;
|
||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||
final int pos = postings.nextPosition();
|
||||
if (pos < 0) {
|
||||
throw new RuntimeException("position " + pos + " is out of bounds");
|
||||
}
|
||||
if (pos <= lastPosition) {
|
||||
throw new RuntimeException("position " + pos + " is <= lastPosition " + lastPosition);
|
||||
}
|
||||
lastPosition = pos;
|
||||
}
|
||||
|
||||
final int nextDocID = postings.nextDoc();
|
||||
if (nextDocID == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
if (nextDocID <= docID) {
|
||||
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
|
||||
}
|
||||
}
|
||||
if (nextDocID <= docID) {
|
||||
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
|
||||
}
|
||||
} else {
|
||||
for(int idx=0;idx<7;idx++) {
|
||||
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
|
||||
docs = terms.docs(delDocs, docs);
|
||||
final int docID = docs.advance(skipDocID);
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
} else {
|
||||
if (docID < skipDocID) {
|
||||
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
|
||||
}
|
||||
final int nextDocID = docs.nextDoc();
|
||||
if (nextDocID == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
if (nextDocID <= docID) {
|
||||
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -233,6 +233,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
private Bits skipDocs;
|
||||
private int docID;
|
||||
private int freq;
|
||||
private int payloadLength;
|
||||
|
||||
public PulsingDocsEnum(FieldInfo fieldInfo) {
|
||||
omitTF = fieldInfo.omitTermFreqAndPositions;
|
||||
|
@ -246,6 +247,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
System.arraycopy(termState.postings, 0, bytes, 0, termState.postingsSize);
|
||||
postings.reset(bytes);
|
||||
docID = 0;
|
||||
payloadLength = 0;
|
||||
freq = 1;
|
||||
this.skipDocs = skipDocs;
|
||||
return this;
|
||||
|
@ -277,7 +279,6 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
// Skip positions
|
||||
if (storePayloads) {
|
||||
int payloadLength = -1;
|
||||
for(int pos=0;pos<freq;pos++) {
|
||||
final int posCode = postings.readVInt();
|
||||
if ((posCode & 1) != 0) {
|
||||
|
@ -352,6 +353,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
postings.reset(bytes);
|
||||
this.skipDocs = skipDocs;
|
||||
payloadLength = 0;
|
||||
posPending = 0;
|
||||
docID = 0;
|
||||
//System.out.println("PR d&p reset storesPayloads=" + storePayloads + " bytes=" + bytes.length + " this=" + this);
|
||||
return this;
|
||||
|
@ -359,7 +361,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
//System.out.println("PR d&p nextDoc this=" + this);
|
||||
//System.out.println("PR.nextDoc this=" + this);
|
||||
|
||||
while(true) {
|
||||
//System.out.println(" cycle skip posPending=" + posPending);
|
||||
|
@ -367,15 +369,16 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
skipPositions();
|
||||
|
||||
if (postings.eof()) {
|
||||
//System.out.println("PR END");
|
||||
//System.out.println(" END");
|
||||
return docID = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
//System.out.println(" read doc code");
|
||||
final int code = postings.readVInt();
|
||||
docID += code >>> 1; // shift off low bit
|
||||
if ((code & 1) != 0) { // if low bit is set
|
||||
freq = 1; // freq is one
|
||||
} else {
|
||||
//System.out.println(" read freq");
|
||||
freq = postings.readVInt(); // else read freq
|
||||
}
|
||||
posPending = freq;
|
||||
|
@ -400,10 +403,12 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
//System.out.println("PR.advance target=" + target);
|
||||
int doc;
|
||||
while((doc=nextDoc()) != NO_MORE_DOCS) {
|
||||
//System.out.println(" nextDoc got doc=" + doc);
|
||||
if (doc >= target) {
|
||||
return doc;
|
||||
return docID = doc;
|
||||
}
|
||||
}
|
||||
return docID = NO_MORE_DOCS;
|
||||
|
@ -411,7 +416,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
//System.out.println("PR d&p nextPosition posPending=" + posPending + " vs freq=" + freq);
|
||||
//System.out.println("PR.nextPosition posPending=" + posPending + " vs freq=" + freq);
|
||||
|
||||
assert posPending > 0;
|
||||
posPending--;
|
||||
|
@ -421,6 +426,7 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
//System.out.println("PR skip payload=" + payloadLength);
|
||||
postings.skipBytes(payloadLength);
|
||||
}
|
||||
//System.out.println(" read pos code");
|
||||
final int code = postings.readVInt();
|
||||
//System.out.println("PR code=" + code);
|
||||
if ((code & 1) != 0) {
|
||||
|
@ -433,16 +439,17 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
|
|||
position += postings.readVInt();
|
||||
}
|
||||
|
||||
//System.out.println("PR d&p nextPos return pos=" + position + " this=" + this);
|
||||
//System.out.println(" return pos=" + position + " hasPayload=" + !payloadRetrieved + " posPending=" + posPending + " this=" + this);
|
||||
return position;
|
||||
}
|
||||
|
||||
private void skipPositions() throws IOException {
|
||||
//System.out.println("PR.skipPositions: posPending=" + posPending);
|
||||
while(posPending != 0) {
|
||||
nextPosition();
|
||||
}
|
||||
if (storePayloads && !payloadRetrieved) {
|
||||
//System.out.println(" skip payload len=" + payloadLength);
|
||||
//System.out.println(" skip last payload len=" + payloadLength);
|
||||
postings.skipBytes(payloadLength);
|
||||
payloadRetrieved = true;
|
||||
}
|
||||
|
|
|
@ -201,6 +201,7 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
if (!omitTF) {
|
||||
int lastDocID = 0;
|
||||
int pendingIDX = 0;
|
||||
int lastPayloadLength = -1;
|
||||
while(pendingIDX < pendingCount) {
|
||||
final Position doc = pending[pendingIDX];
|
||||
|
||||
|
@ -217,7 +218,6 @@ public final class PulsingPostingsWriterImpl extends PostingsWriterBase {
|
|||
}
|
||||
|
||||
int lastPos = 0;
|
||||
int lastPayloadLength = -1;
|
||||
for(int posIDX=0;posIDX<doc.termFreq;posIDX++) {
|
||||
final Position pos = pending[pendingIDX++];
|
||||
assert pos.docID == doc.docID;
|
||||
|
|
|
@ -421,6 +421,9 @@ public class MockDirectoryWrapper extends Directory {
|
|||
}
|
||||
open = false;
|
||||
if (checkIndexOnClose) {
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
|
||||
}
|
||||
if (codecProvider != null) {
|
||||
if (IndexReader.indexExists(this, codecProvider)) {
|
||||
_TestUtil.checkIndex(this, codecProvider);
|
||||
|
|
|
@ -1128,6 +1128,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
lmp.setMaxMergeDocs(20);
|
||||
lmp.setMergeFactor(2);
|
||||
IndexWriter iw = new IndexWriter(dir, conf);
|
||||
iw.setInfoStream(VERBOSE ? System.out : null);
|
||||
Document document = new Document();
|
||||
document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED,
|
||||
Field.TermVector.YES));
|
||||
|
|
Loading…
Reference in New Issue