mirror of https://github.com/apache/lucene.git
LUCENE-1168: fix corruption cases with mixed term vectors and autoCommit=false
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@619640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fc4bc1f5af
commit
9716742942
|
@ -44,6 +44,10 @@ Bug fixes
|
|||
2. LUCENE-1163: Fixed bug in CharArraySet.contains(char[] buffer, int
|
||||
offset, int len) that was ignoring offset and thus giving the
|
||||
wrong answer. (Thomas Peuss via Mike McCandless)
|
||||
|
||||
3. LUCENE-1168: Fixed corruption cases when autoCommit=false and
|
||||
documents have mixed term vectors (Suresh Guvvala via Mike
|
||||
McCandless).
|
||||
|
||||
New features
|
||||
|
||||
|
|
|
@ -876,10 +876,12 @@ final class DocumentsWriter {
|
|||
tvf = directory.createOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
|
||||
tvf.writeInt(TermVectorsReader.FORMAT_VERSION2);
|
||||
|
||||
// We must "catch up" for all docIDs that had no
|
||||
// vectors before this one
|
||||
for(int i=0;i<docID;i++) {
|
||||
tvx.writeLong(0);
|
||||
// We must "catch up" for all docs before us
|
||||
// that had no vectors:
|
||||
final long tvdPos = tvd.getFilePointer();
|
||||
tvd.writeVInt(0);
|
||||
for(int i=0;i<numDocsInStore-1;i++) {
|
||||
tvx.writeLong(tvdPos);
|
||||
tvx.writeLong(0);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
|
|
|
@ -201,7 +201,7 @@ final class FieldsReader {
|
|||
int count = 0;
|
||||
while (count < numDocs) {
|
||||
final long offset;
|
||||
final int docID = startDocID + count + 1;
|
||||
final int docID = docStoreOffset + startDocID + count + 1;
|
||||
assert docID <= numTotalDocs;
|
||||
if (docID < numTotalDocs)
|
||||
offset = indexStream.readLong();
|
||||
|
|
|
@ -168,7 +168,8 @@ class TermVectorsReader implements Cloneable {
|
|||
|
||||
int count = 0;
|
||||
while (count < numDocs) {
|
||||
final int docID = startDocID + count + 1;
|
||||
final int docID = docStoreOffset + startDocID + count + 1;
|
||||
assert docID <= numTotalDocs;
|
||||
if (docID < numTotalDocs) {
|
||||
tvdPosition = tvx.readLong();
|
||||
tvfPosition = tvx.readLong();
|
||||
|
|
|
@ -2682,6 +2682,148 @@ public class TestIndexWriter extends LuceneTestCase
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption() throws IOException {
|
||||
|
||||
Directory dir = new MockRAMDirectory();
|
||||
for(int iter=0;iter<4;iter++) {
|
||||
final boolean autoCommit = 1==iter/2;
|
||||
IndexWriter writer = new IndexWriter(dir,
|
||||
autoCommit, new StandardAnalyzer(),
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMaxBufferedDocs(2);
|
||||
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
|
||||
writer.setMergeScheduler(new SerialMergeScheduler());
|
||||
writer.setMergePolicy(new LogDocMergePolicy());
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
Field storedField = new Field("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
writer.addDocument(document);
|
||||
writer.addDocument(document);
|
||||
|
||||
document = new Document();
|
||||
document.add(storedField);
|
||||
Field termVectorField = new Field("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.UN_TOKENIZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
|
||||
document.add(termVectorField);
|
||||
writer.addDocument(document);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
for(int i=0;i<reader.numDocs();i++) {
|
||||
reader.document(i);
|
||||
reader.getTermFreqVectors(i);
|
||||
}
|
||||
reader.close();
|
||||
|
||||
writer = new IndexWriter(dir,
|
||||
autoCommit, new StandardAnalyzer(),
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMaxBufferedDocs(2);
|
||||
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
|
||||
writer.setMergeScheduler(new SerialMergeScheduler());
|
||||
writer.setMergePolicy(new LogDocMergePolicy());
|
||||
|
||||
Directory[] indexDirs = { dir};
|
||||
writer.addIndexes(indexDirs);
|
||||
writer.close();
|
||||
}
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption2() throws IOException {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
for(int iter=0;iter<4;iter++) {
|
||||
final boolean autoCommit = 1==iter/2;
|
||||
IndexWriter writer = new IndexWriter(dir,
|
||||
autoCommit, new StandardAnalyzer(),
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMaxBufferedDocs(2);
|
||||
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
|
||||
writer.setMergeScheduler(new SerialMergeScheduler());
|
||||
writer.setMergePolicy(new LogDocMergePolicy());
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
Field storedField = new Field("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
writer.addDocument(document);
|
||||
writer.addDocument(document);
|
||||
|
||||
document = new Document();
|
||||
document.add(storedField);
|
||||
Field termVectorField = new Field("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.UN_TOKENIZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
document.add(termVectorField);
|
||||
writer.addDocument(document);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
assertTrue(reader.getTermFreqVectors(0)==null);
|
||||
assertTrue(reader.getTermFreqVectors(1)==null);
|
||||
assertTrue(reader.getTermFreqVectors(2)!=null);
|
||||
reader.close();
|
||||
}
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1168
|
||||
public void testTermVectorCorruption3() throws IOException {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir,
|
||||
false, new StandardAnalyzer(),
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMaxBufferedDocs(2);
|
||||
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
|
||||
writer.setMergeScheduler(new SerialMergeScheduler());
|
||||
writer.setMergePolicy(new LogDocMergePolicy());
|
||||
|
||||
Document document = new Document();
|
||||
|
||||
document = new Document();
|
||||
Field storedField = new Field("stored", "stored", Field.Store.YES,
|
||||
Field.Index.NO);
|
||||
document.add(storedField);
|
||||
Field termVectorField = new Field("termVector", "termVector",
|
||||
Field.Store.NO, Field.Index.UN_TOKENIZED,
|
||||
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
document.add(termVectorField);
|
||||
for(int i=0;i<10;i++)
|
||||
writer.addDocument(document);
|
||||
writer.close();
|
||||
|
||||
writer = new IndexWriter(dir,
|
||||
false, new StandardAnalyzer(),
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.setMaxBufferedDocs(2);
|
||||
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
|
||||
writer.setMergeScheduler(new SerialMergeScheduler());
|
||||
writer.setMergePolicy(new LogDocMergePolicy());
|
||||
for(int i=0;i<6;i++)
|
||||
writer.addDocument(document);
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
for(int i=0;i<10;i++) {
|
||||
reader.getTermFreqVectors(i);
|
||||
reader.document(i);
|
||||
}
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1084: test user-specified field length
|
||||
public void testUserSpecifiedMaxFieldLength() throws IOException {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
|
|
Loading…
Reference in New Issue