mirror of https://github.com/apache/lucene.git
LUCENE-4221: CheckIndex is overeager for term vector offsets bounds checks
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1361701 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
75b6bf69bd
commit
383d17e6a5
|
@ -48,6 +48,9 @@ Bug Fixes
|
|||
leave temp files behind in /tmp on Windows. Fix Sort to not leave
|
||||
temp files behind when /tmp is a separate volume. (Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-4221: Fix overeager CheckIndex validation for term vector offsets.
|
||||
(Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-4094: Support overriding file.encoding on forked test JVMs
|
||||
|
|
|
@ -668,7 +668,7 @@ public class CheckIndex {
|
|||
* checks Fields api is consistent with itself.
|
||||
* searcher is optional, to verify with queries. Can be null.
|
||||
*/
|
||||
private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint) throws IOException {
|
||||
private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, boolean doPrint, boolean isVectors) throws IOException {
|
||||
// TODO: we should probably return our own stats thing...?!
|
||||
|
||||
final Status.TermIndexStatus status = new Status.TermIndexStatus();
|
||||
|
@ -863,17 +863,21 @@ public class CheckIndex {
|
|||
if (hasOffsets) {
|
||||
int startOffset = postings.startOffset();
|
||||
int endOffset = postings.endOffset();
|
||||
if (startOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
|
||||
}
|
||||
if (startOffset < lastOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
|
||||
}
|
||||
if (endOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
|
||||
}
|
||||
if (endOffset < startOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
|
||||
// NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before?
|
||||
// but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter
|
||||
if (!isVectors) {
|
||||
if (startOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
|
||||
}
|
||||
if (startOffset < lastOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
|
||||
}
|
||||
if (endOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
|
||||
}
|
||||
if (endOffset < startOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
|
||||
}
|
||||
}
|
||||
lastOffset = startOffset;
|
||||
}
|
||||
|
@ -956,17 +960,21 @@ public class CheckIndex {
|
|||
if (hasOffsets) {
|
||||
int startOffset = postings.startOffset();
|
||||
int endOffset = postings.endOffset();
|
||||
if (startOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
|
||||
}
|
||||
if (startOffset < lastOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
|
||||
}
|
||||
if (endOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
|
||||
}
|
||||
if (endOffset < startOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
|
||||
// NOTE: we cannot enforce any bounds whatsoever on vectors... they were a free-for-all before?
|
||||
// but for offsets in the postings lists these checks are fine: they were always enforced by IndexWriter
|
||||
if (!isVectors) {
|
||||
if (startOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " is out of bounds");
|
||||
}
|
||||
if (startOffset < lastOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": startOffset " + startOffset + " < lastStartOffset " + lastOffset);
|
||||
}
|
||||
if (endOffset < 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " is out of bounds");
|
||||
}
|
||||
if (endOffset < startOffset) {
|
||||
throw new RuntimeException("term " + term + ": doc " + docID + ": pos " + pos + ": endOffset " + endOffset + " < startOffset " + startOffset);
|
||||
}
|
||||
}
|
||||
lastOffset = startOffset;
|
||||
}
|
||||
|
@ -1193,12 +1201,12 @@ public class CheckIndex {
|
|||
}
|
||||
|
||||
final Fields fields = reader.fields();
|
||||
status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true);
|
||||
status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true, false);
|
||||
if (liveDocs != null) {
|
||||
if (infoStream != null) {
|
||||
infoStream.print(" test (ignoring deletes): terms, freq, prox...");
|
||||
}
|
||||
checkFields(fields, null, maxDoc, fieldInfos, true);
|
||||
checkFields(fields, null, maxDoc, fieldInfos, true, false);
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
msg("ERROR: " + e);
|
||||
|
@ -1415,10 +1423,10 @@ public class CheckIndex {
|
|||
|
||||
if (tfv != null) {
|
||||
// First run with no deletions:
|
||||
checkFields(tfv, null, 1, fieldInfos, false);
|
||||
checkFields(tfv, null, 1, fieldInfos, false, true);
|
||||
|
||||
// Again, with the one doc deleted:
|
||||
checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false);
|
||||
checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true);
|
||||
|
||||
// Only agg stats if the doc is live:
|
||||
final boolean doStats = liveDocs == null || liveDocs.get(j);
|
||||
|
|
|
@ -25,8 +25,11 @@ import java.util.ArrayList;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.util.Constants;
|
||||
|
@ -93,6 +96,24 @@ public class TestCheckIndex extends LuceneTestCase {
|
|||
assertTrue(checker.checkIndex(onlySegments).clean == true);
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-4221: we have to let these thru, for now
|
||||
public void testBogusTermVectors() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||
Document doc = new Document();
|
||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
ft.setStoreTermVectors(true);
|
||||
ft.setStoreTermVectorOffsets(true);
|
||||
Field field = new Field("foo", "", ft);
|
||||
field.setTokenStream(new CannedTokenStream(
|
||||
new Token("bar", 5, 10), new Token("bar", 1, 4)
|
||||
));
|
||||
doc.add(field);
|
||||
iw.addDocument(doc);
|
||||
iw.close();
|
||||
dir.close(); // checkindex
|
||||
}
|
||||
|
||||
public void testLuceneConstantVersion() throws IOException {
|
||||
// common-build.xml sets lucene.version
|
||||
|
|
Loading…
Reference in New Issue