mirror of
https://github.com/apache/lucene.git
synced 2025-03-02 14:29:23 +00:00
sanity check offsets in checkindex
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1349349 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2397fb9d65
commit
4b212a7c4f
@ -681,6 +681,7 @@ public class CheckIndex {
|
|||||||
DocsEnum docs = null;
|
DocsEnum docs = null;
|
||||||
DocsEnum docsAndFreqs = null;
|
DocsEnum docsAndFreqs = null;
|
||||||
DocsAndPositionsEnum postings = null;
|
DocsAndPositionsEnum postings = null;
|
||||||
|
DocsAndPositionsEnum offsets = null;
|
||||||
|
|
||||||
String lastField = null;
|
String lastField = null;
|
||||||
final FieldsEnum fieldsEnum = fields.iterator();
|
final FieldsEnum fieldsEnum = fields.iterator();
|
||||||
@ -756,6 +757,7 @@ public class CheckIndex {
|
|||||||
docs = termsEnum.docs(liveDocs, docs, false);
|
docs = termsEnum.docs(liveDocs, docs, false);
|
||||||
docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
|
docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
|
||||||
postings = termsEnum.docsAndPositions(liveDocs, postings, false);
|
postings = termsEnum.docsAndPositions(liveDocs, postings, false);
|
||||||
|
offsets = termsEnum.docsAndPositions(liveDocs, offsets, true);
|
||||||
|
|
||||||
if (hasOrd) {
|
if (hasOrd) {
|
||||||
long ord = -1;
|
long ord = -1;
|
||||||
@ -779,19 +781,29 @@ public class CheckIndex {
|
|||||||
final DocsEnum docsAndFreqs2;
|
final DocsEnum docsAndFreqs2;
|
||||||
final boolean hasPositions;
|
final boolean hasPositions;
|
||||||
final boolean hasFreqs;
|
final boolean hasFreqs;
|
||||||
if (postings != null) {
|
final boolean hasOffsets;
|
||||||
|
if (offsets != null) {
|
||||||
|
docs2 = postings = offsets;
|
||||||
|
docsAndFreqs2 = postings = offsets;
|
||||||
|
hasOffsets = true;
|
||||||
|
hasPositions = true;
|
||||||
|
hasFreqs = true;
|
||||||
|
} else if (postings != null) {
|
||||||
docs2 = postings;
|
docs2 = postings;
|
||||||
docsAndFreqs2 = postings;
|
docsAndFreqs2 = postings;
|
||||||
|
hasOffsets = false;
|
||||||
hasPositions = true;
|
hasPositions = true;
|
||||||
hasFreqs = true;
|
hasFreqs = true;
|
||||||
} else if (docsAndFreqs != null) {
|
} else if (docsAndFreqs != null) {
|
||||||
docs2 = docsAndFreqs;
|
docs2 = docsAndFreqs;
|
||||||
docsAndFreqs2 = docsAndFreqs;
|
docsAndFreqs2 = docsAndFreqs;
|
||||||
|
hasOffsets = false;
|
||||||
hasPositions = false;
|
hasPositions = false;
|
||||||
hasFreqs = true;
|
hasFreqs = true;
|
||||||
} else {
|
} else {
|
||||||
docs2 = docs;
|
docs2 = docs;
|
||||||
docsAndFreqs2 = null;
|
docsAndFreqs2 = null;
|
||||||
|
hasOffsets = false;
|
||||||
hasPositions = false;
|
hasPositions = false;
|
||||||
hasFreqs = false;
|
hasFreqs = false;
|
||||||
}
|
}
|
||||||
@ -826,6 +838,7 @@ public class CheckIndex {
|
|||||||
lastDoc = doc;
|
lastDoc = doc;
|
||||||
|
|
||||||
int lastPos = -1;
|
int lastPos = -1;
|
||||||
|
int lastOffset = 0;
|
||||||
if (hasPositions) {
|
if (hasPositions) {
|
||||||
for(int j=0;j<freq;j++) {
|
for(int j=0;j<freq;j++) {
|
||||||
final int pos = postings.nextPosition();
|
final int pos = postings.nextPosition();
|
||||||
@ -846,6 +859,23 @@ public class CheckIndex {
|
|||||||
if (postings.hasPayload()) {
|
if (postings.hasPayload()) {
|
||||||
postings.getPayload();
|
postings.getPayload();
|
||||||
}
|
}
|
||||||
|
if (hasOffsets) {
|
||||||
|
int startOffset = postings.startOffset();
|
||||||
|
int endOffset = postings.endOffset();
|
||||||
|
if (startOffset < 0) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
|
||||||
|
}
|
||||||
|
if (startOffset < lastOffset) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
|
||||||
|
}
|
||||||
|
if (endOffset < 0) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
|
||||||
|
}
|
||||||
|
if (endOffset < startOffset) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
|
||||||
|
}
|
||||||
|
lastOffset = startOffset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -892,7 +922,7 @@ public class CheckIndex {
|
|||||||
if (hasPositions) {
|
if (hasPositions) {
|
||||||
for(int idx=0;idx<7;idx++) {
|
for(int idx=0;idx<7;idx++) {
|
||||||
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
|
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
|
||||||
postings = termsEnum.docsAndPositions(liveDocs, postings, false);
|
postings = termsEnum.docsAndPositions(liveDocs, postings, hasOffsets);
|
||||||
final int docID = postings.advance(skipDocID);
|
final int docID = postings.advance(skipDocID);
|
||||||
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
break;
|
break;
|
||||||
@ -905,6 +935,7 @@ public class CheckIndex {
|
|||||||
throw new RuntimeException("termFreq " + freq + " is out of bounds");
|
throw new RuntimeException("termFreq " + freq + " is out of bounds");
|
||||||
}
|
}
|
||||||
int lastPosition = -1;
|
int lastPosition = -1;
|
||||||
|
int lastOffset = 0;
|
||||||
for(int posUpto=0;posUpto<freq;posUpto++) {
|
for(int posUpto=0;posUpto<freq;posUpto++) {
|
||||||
final int pos = postings.nextPosition();
|
final int pos = postings.nextPosition();
|
||||||
// NOTE: pos=-1 is allowed because of ancient bug
|
// NOTE: pos=-1 is allowed because of ancient bug
|
||||||
@ -921,6 +952,23 @@ public class CheckIndex {
|
|||||||
throw new RuntimeException("position " + pos + " is < lastPosition " + lastPosition);
|
throw new RuntimeException("position " + pos + " is < lastPosition " + lastPosition);
|
||||||
}
|
}
|
||||||
lastPosition = pos;
|
lastPosition = pos;
|
||||||
|
if (hasOffsets) {
|
||||||
|
int startOffset = postings.startOffset();
|
||||||
|
int endOffset = postings.endOffset();
|
||||||
|
if (startOffset < 0) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
|
||||||
|
}
|
||||||
|
if (startOffset < lastOffset) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
|
||||||
|
}
|
||||||
|
if (endOffset < 0) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
|
||||||
|
}
|
||||||
|
if (endOffset < startOffset) {
|
||||||
|
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
|
||||||
|
}
|
||||||
|
lastOffset = startOffset;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final int nextDocID = postings.nextDoc();
|
final int nextDocID = postings.nextDoc();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user