LUCENE-1168: fix corruption cases with mixed term vectors and autoCommit=false

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@619640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-02-07 21:13:36 +00:00
parent fc4bc1f5af
commit 9716742942
5 changed files with 155 additions and 6 deletions

View File

@ -44,6 +44,10 @@ Bug fixes
2. LUCENE-1163: Fixed bug in CharArraySet.contains(char[] buffer, int
offset, int len) that was ignoring offset and thus giving the
wrong answer. (Thomas Peuss via Mike McCandless)
3. LUCENE-1168: Fixed corruption cases when autoCommit=false and
documents have mixed term vectors (Suresh Guvvala via Mike
McCandless).
New features

View File

@ -876,10 +876,12 @@ final class DocumentsWriter {
tvf = directory.createOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
tvf.writeInt(TermVectorsReader.FORMAT_VERSION2);
// We must "catch up" for all docIDs that had no
// vectors before this one
for(int i=0;i<docID;i++) {
tvx.writeLong(0);
// We must "catch up" for all docs before us
// that had no vectors:
final long tvdPos = tvd.getFilePointer();
tvd.writeVInt(0);
for(int i=0;i<numDocsInStore-1;i++) {
tvx.writeLong(tvdPos);
tvx.writeLong(0);
}
} catch (Throwable t) {

View File

@ -201,7 +201,7 @@ final class FieldsReader {
int count = 0;
while (count < numDocs) {
final long offset;
final int docID = startDocID + count + 1;
final int docID = docStoreOffset + startDocID + count + 1;
assert docID <= numTotalDocs;
if (docID < numTotalDocs)
offset = indexStream.readLong();

View File

@ -168,7 +168,8 @@ class TermVectorsReader implements Cloneable {
int count = 0;
while (count < numDocs) {
final int docID = startDocID + count + 1;
final int docID = docStoreOffset + startDocID + count + 1;
assert docID <= numTotalDocs;
if (docID < numTotalDocs) {
tvdPosition = tvx.readLong();
tvfPosition = tvx.readLong();

View File

@ -2682,6 +2682,148 @@ public class TestIndexWriter extends LuceneTestCase
dir.close();
}
// LUCENE-1168
public void testTermVectorCorruption() throws IOException {
Directory dir = new MockRAMDirectory();
for(int iter=0;iter<4;iter++) {
final boolean autoCommit = 1==iter/2;
IndexWriter writer = new IndexWriter(dir,
autoCommit, new StandardAnalyzer(),
IndexWriter.MaxFieldLength.LIMITED);
writer.setMaxBufferedDocs(2);
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
writer.setMergeScheduler(new SerialMergeScheduler());
writer.setMergePolicy(new LogDocMergePolicy());
Document document = new Document();
Field storedField = new Field("stored", "stored", Field.Store.YES,
Field.Index.NO);
document.add(storedField);
writer.addDocument(document);
writer.addDocument(document);
document = new Document();
document.add(storedField);
Field termVectorField = new Field("termVector", "termVector",
Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS);
document.add(termVectorField);
writer.addDocument(document);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(dir);
for(int i=0;i<reader.numDocs();i++) {
reader.document(i);
reader.getTermFreqVectors(i);
}
reader.close();
writer = new IndexWriter(dir,
autoCommit, new StandardAnalyzer(),
IndexWriter.MaxFieldLength.LIMITED);
writer.setMaxBufferedDocs(2);
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
writer.setMergeScheduler(new SerialMergeScheduler());
writer.setMergePolicy(new LogDocMergePolicy());
Directory[] indexDirs = { dir};
writer.addIndexes(indexDirs);
writer.close();
}
dir.close();
}
// LUCENE-1168
public void testTermVectorCorruption2() throws IOException {
Directory dir = new MockRAMDirectory();
for(int iter=0;iter<4;iter++) {
final boolean autoCommit = 1==iter/2;
IndexWriter writer = new IndexWriter(dir,
autoCommit, new StandardAnalyzer(),
IndexWriter.MaxFieldLength.LIMITED);
writer.setMaxBufferedDocs(2);
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
writer.setMergeScheduler(new SerialMergeScheduler());
writer.setMergePolicy(new LogDocMergePolicy());
Document document = new Document();
Field storedField = new Field("stored", "stored", Field.Store.YES,
Field.Index.NO);
document.add(storedField);
writer.addDocument(document);
writer.addDocument(document);
document = new Document();
document.add(storedField);
Field termVectorField = new Field("termVector", "termVector",
Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS);
document.add(termVectorField);
writer.addDocument(document);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(dir);
assertTrue(reader.getTermFreqVectors(0)==null);
assertTrue(reader.getTermFreqVectors(1)==null);
assertTrue(reader.getTermFreqVectors(2)!=null);
reader.close();
}
dir.close();
}
// LUCENE-1168
public void testTermVectorCorruption3() throws IOException {
Directory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir,
false, new StandardAnalyzer(),
IndexWriter.MaxFieldLength.LIMITED);
writer.setMaxBufferedDocs(2);
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
writer.setMergeScheduler(new SerialMergeScheduler());
writer.setMergePolicy(new LogDocMergePolicy());
Document document = new Document();
document = new Document();
Field storedField = new Field("stored", "stored", Field.Store.YES,
Field.Index.NO);
document.add(storedField);
Field termVectorField = new Field("termVector", "termVector",
Field.Store.NO, Field.Index.UN_TOKENIZED,
Field.TermVector.WITH_POSITIONS_OFFSETS);
document.add(termVectorField);
for(int i=0;i<10;i++)
writer.addDocument(document);
writer.close();
writer = new IndexWriter(dir,
false, new StandardAnalyzer(),
IndexWriter.MaxFieldLength.LIMITED);
writer.setMaxBufferedDocs(2);
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
writer.setMergeScheduler(new SerialMergeScheduler());
writer.setMergePolicy(new LogDocMergePolicy());
for(int i=0;i<6;i++)
writer.addDocument(document);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(dir);
for(int i=0;i<10;i++) {
reader.getTermFreqVectors(i);
reader.document(i);
}
reader.close();
dir.close();
}
// LUCENE-1084: test user-specified field length
public void testUserSpecifiedMaxFieldLength() throws IOException {
Directory dir = new MockRAMDirectory();