mirror of https://github.com/apache/lucene.git
LUCENE-1176: fix corruption case when adding docs with no term vectors followed by docs with term vectors
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@627122 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
527957bc94
commit
ec18d3ad7c
|
@ -258,7 +258,7 @@ final class DocumentsWriter {
|
|||
List flushedFiles = files();
|
||||
|
||||
if (infoStream != null)
|
||||
infoStream.println("\ncloseDocStore: " + flushedFiles.size() + " files to flush to segment " + docStoreSegment);
|
||||
infoStream.println("\ncloseDocStore: " + flushedFiles.size() + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore);
|
||||
|
||||
if (flushedFiles.size() > 0) {
|
||||
files = null;
|
||||
|
@ -665,6 +665,8 @@ final class DocumentsWriter {
|
|||
// it means those files are possibly inconsistent.
|
||||
try {
|
||||
|
||||
numDocsInStore++;
|
||||
|
||||
// Append stored fields to the real FieldsWriter:
|
||||
fieldsWriter.flushDocument(numStoredFields, fdtLocal);
|
||||
fdtLocal.reset();
|
||||
|
@ -888,10 +890,9 @@ final class DocumentsWriter {
|
|||
|
||||
// We must "catch up" for all docs before us
|
||||
// that had no vectors:
|
||||
final long tvdPos = tvd.getFilePointer();
|
||||
tvd.writeVInt(0);
|
||||
for(int i=0;i<numDocsInStore-1;i++) {
|
||||
tvx.writeLong(tvdPos);
|
||||
for(int i=0;i<numDocsInStore;i++) {
|
||||
tvx.writeLong(tvd.getFilePointer());
|
||||
tvd.writeVInt(0);
|
||||
tvx.writeLong(0);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
|
@ -2370,7 +2371,6 @@ final class DocumentsWriter {
|
|||
segment = writer.newSegmentName();
|
||||
|
||||
numDocsInRAM++;
|
||||
numDocsInStore++;
|
||||
|
||||
// We must at this point commit to flushing to ensure we
|
||||
// always get N docs when we flush by doc count, even if
|
||||
|
|
|
@ -96,7 +96,7 @@ class TermVectorsReader implements Cloneable {
|
|||
this.size = size;
|
||||
// Verify the file is long enough to hold all of our
|
||||
// docs
|
||||
assert numTotalDocs >= size + docStoreOffset;
|
||||
assert numTotalDocs >= size + docStoreOffset: "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset;
|
||||
}
|
||||
} else
|
||||
format = 0;
|
||||
|
|
|
@ -21,6 +21,8 @@ import org.apache.lucene.analysis.*;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
@ -38,9 +40,9 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
Directory dir1 = new RAMDirectory();
|
||||
Directory dir1 = new MockRAMDirectory();
|
||||
// dir1 = FSDirectory.getDirectory("foofoofoo");
|
||||
Directory dir2 = new RAMDirectory();
|
||||
Directory dir2 = new MockRAMDirectory();
|
||||
// mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
|
||||
Map docs = indexRandom(10, 100, 100, dir1);
|
||||
indexSerial(docs, dir2);
|
||||
|
@ -52,6 +54,16 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
verifyEquals(dir1, dir2, "id");
|
||||
}
|
||||
|
||||
private void checkIndex(Directory dir) throws IOException {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex.out = new PrintStream(bos);
|
||||
if (!CheckIndex.check(dir, false, null)) {
|
||||
System.out.println("CheckIndex failed");
|
||||
System.out.println(bos.toString());
|
||||
fail("CheckIndex failed");
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultiConfig() throws Exception {
|
||||
// test lots of smaller different params together
|
||||
for (int i=0; i<100; i++) { // increase iterations for better testing
|
||||
|
@ -65,8 +77,8 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
int iter=r.nextInt(10)+1;
|
||||
int range=r.nextInt(20)+1;
|
||||
|
||||
Directory dir1 = new RAMDirectory();
|
||||
Directory dir2 = new RAMDirectory();
|
||||
Directory dir1 = new MockRAMDirectory();
|
||||
Directory dir2 = new MockRAMDirectory();
|
||||
Map docs = indexRandom(nThreads, iter, range, dir1);
|
||||
indexSerial(docs, dir2);
|
||||
verifyEquals(dir1, dir2, "id");
|
||||
|
@ -87,7 +99,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
// everything.
|
||||
|
||||
public Map indexRandom(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
|
||||
IndexWriter w = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
|
||||
IndexWriter w = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
w.setUseCompoundFile(false);
|
||||
/***
|
||||
w.setMaxMergeDocs(Integer.MAX_VALUE);
|
||||
|
@ -129,6 +141,8 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
checkIndex(dir);
|
||||
|
||||
return docs;
|
||||
}
|
||||
|
||||
|
@ -195,7 +209,27 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
r2r1[id2] = id1;
|
||||
|
||||
// verify stored fields are equivalent
|
||||
verifyEquals(r1.document(id1), r2.document(id2));
|
||||
verifyEquals(r1.document(id1), r2.document(id2));
|
||||
|
||||
try {
|
||||
// verify term vectors are equivalent
|
||||
verifyEquals(r1.getTermFreqVectors(id1), r2.getTermFreqVectors(id2));
|
||||
} catch (java.lang.Error e) {
|
||||
System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
|
||||
TermFreqVector[] tv1 = r1.getTermFreqVectors(id1);
|
||||
System.out.println(" d1=" + tv1);
|
||||
if (tv1 != null)
|
||||
for(int i=0;i<tv1.length;i++)
|
||||
System.out.println(" " + i + ": " + tv1[i]);
|
||||
|
||||
TermFreqVector[] tv2 = r2.getTermFreqVectors(id2);
|
||||
System.out.println(" d2=" + tv2);
|
||||
if (tv2 != null)
|
||||
for(int i=0;i<tv2.length;i++)
|
||||
System.out.println(" " + i + ": " + tv2[i]);
|
||||
|
||||
throw e;
|
||||
}
|
||||
|
||||
} while (termEnum.next());
|
||||
|
||||
|
@ -300,6 +334,55 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public static void verifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) {
|
||||
if (d1 == null) {
|
||||
assertTrue(d2 == null);
|
||||
return;
|
||||
}
|
||||
assertTrue(d2 != null);
|
||||
|
||||
assertEquals(d1.length, d2.length);
|
||||
for(int i=0;i<d1.length;i++) {
|
||||
TermFreqVector v1 = d1[i];
|
||||
TermFreqVector v2 = d2[i];
|
||||
assertEquals(v1.size(), v2.size());
|
||||
int numTerms = v1.size();
|
||||
String[] terms1 = v1.getTerms();
|
||||
String[] terms2 = v2.getTerms();
|
||||
int[] freq1 = v1.getTermFrequencies();
|
||||
int[] freq2 = v2.getTermFrequencies();
|
||||
for(int j=0;j<numTerms;j++) {
|
||||
if (!terms1[j].equals(terms2[j]))
|
||||
assertEquals(terms1[j], terms2[j]);
|
||||
assertEquals(freq1[j], freq2[j]);
|
||||
}
|
||||
if (v1 instanceof TermPositionVector) {
|
||||
assertTrue(v2 instanceof TermPositionVector);
|
||||
TermPositionVector tpv1 = (TermPositionVector) v1;
|
||||
TermPositionVector tpv2 = (TermPositionVector) v2;
|
||||
for(int j=0;j<numTerms;j++) {
|
||||
int[] pos1 = tpv1.getTermPositions(j);
|
||||
int[] pos2 = tpv2.getTermPositions(j);
|
||||
assertEquals(pos1.length, pos2.length);
|
||||
TermVectorOffsetInfo[] offsets1 = tpv1.getOffsets(j);
|
||||
TermVectorOffsetInfo[] offsets2 = tpv2.getOffsets(j);
|
||||
if (offsets1 == null)
|
||||
assertTrue(offsets2 == null);
|
||||
else
|
||||
assertTrue(offsets2 != null);
|
||||
for(int k=0;k<pos1.length;k++) {
|
||||
assertEquals(pos1[k], pos2[k]);
|
||||
if (offsets1 != null) {
|
||||
assertEquals(offsets1[k].getStartOffset(),
|
||||
offsets2[k].getStartOffset());
|
||||
assertEquals(offsets1[k].getEndOffset(),
|
||||
offsets2[k].getEndOffset());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class IndexingThread extends Thread {
|
||||
IndexWriter w;
|
||||
|
@ -336,18 +419,35 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
|
||||
int nFields = nextInt(maxFields);
|
||||
for (int i=0; i<nFields; i++) {
|
||||
|
||||
Field.TermVector tvVal = Field.TermVector.NO;
|
||||
switch (nextInt(4)) {
|
||||
case 0:
|
||||
tvVal = Field.TermVector.NO;
|
||||
break;
|
||||
case 1:
|
||||
tvVal = Field.TermVector.YES;
|
||||
break;
|
||||
case 2:
|
||||
tvVal = Field.TermVector.WITH_POSITIONS;
|
||||
break;
|
||||
case 3:
|
||||
tvVal = Field.TermVector.WITH_POSITIONS_OFFSETS;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (nextInt(4)) {
|
||||
case 0:
|
||||
fields.add(new Field("f0", getString(1), Field.Store.YES, Field.Index.NO_NORMS));
|
||||
fields.add(new Field("f0", getString(1), Field.Store.YES, Field.Index.NO_NORMS, tvVal));
|
||||
break;
|
||||
case 1:
|
||||
fields.add(new Field("f1", getString(0), Field.Store.NO, Field.Index.TOKENIZED));
|
||||
fields.add(new Field("f1", getString(0), Field.Store.NO, Field.Index.TOKENIZED, tvVal));
|
||||
break;
|
||||
case 2:
|
||||
fields.add(new Field("f2", getString(0), Field.Store.YES, Field.Index.NO));
|
||||
fields.add(new Field("f2", getString(0), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
|
||||
break;
|
||||
case 3:
|
||||
fields.add(new Field("f3", getString(bigFieldSize), Field.Store.YES, Field.Index.TOKENIZED));
|
||||
fields.add(new Field("f3", getString(bigFieldSize), Field.Store.YES, Field.Index.TOKENIZED, tvVal));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue