mirror of https://github.com/apache/lucene.git
LUCENE-1176: fix corruption case when adding docs with no term vectors followed by docs with term vectors
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@627122 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
527957bc94
commit
ec18d3ad7c
|
@ -258,7 +258,7 @@ final class DocumentsWriter {
|
||||||
List flushedFiles = files();
|
List flushedFiles = files();
|
||||||
|
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
infoStream.println("\ncloseDocStore: " + flushedFiles.size() + " files to flush to segment " + docStoreSegment);
|
infoStream.println("\ncloseDocStore: " + flushedFiles.size() + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore);
|
||||||
|
|
||||||
if (flushedFiles.size() > 0) {
|
if (flushedFiles.size() > 0) {
|
||||||
files = null;
|
files = null;
|
||||||
|
@ -665,6 +665,8 @@ final class DocumentsWriter {
|
||||||
// it means those files are possibly inconsistent.
|
// it means those files are possibly inconsistent.
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
numDocsInStore++;
|
||||||
|
|
||||||
// Append stored fields to the real FieldsWriter:
|
// Append stored fields to the real FieldsWriter:
|
||||||
fieldsWriter.flushDocument(numStoredFields, fdtLocal);
|
fieldsWriter.flushDocument(numStoredFields, fdtLocal);
|
||||||
fdtLocal.reset();
|
fdtLocal.reset();
|
||||||
|
@ -888,10 +890,9 @@ final class DocumentsWriter {
|
||||||
|
|
||||||
// We must "catch up" for all docs before us
|
// We must "catch up" for all docs before us
|
||||||
// that had no vectors:
|
// that had no vectors:
|
||||||
final long tvdPos = tvd.getFilePointer();
|
for(int i=0;i<numDocsInStore;i++) {
|
||||||
|
tvx.writeLong(tvd.getFilePointer());
|
||||||
tvd.writeVInt(0);
|
tvd.writeVInt(0);
|
||||||
for(int i=0;i<numDocsInStore-1;i++) {
|
|
||||||
tvx.writeLong(tvdPos);
|
|
||||||
tvx.writeLong(0);
|
tvx.writeLong(0);
|
||||||
}
|
}
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
|
@ -2370,7 +2371,6 @@ final class DocumentsWriter {
|
||||||
segment = writer.newSegmentName();
|
segment = writer.newSegmentName();
|
||||||
|
|
||||||
numDocsInRAM++;
|
numDocsInRAM++;
|
||||||
numDocsInStore++;
|
|
||||||
|
|
||||||
// We must at this point commit to flushing to ensure we
|
// We must at this point commit to flushing to ensure we
|
||||||
// always get N docs when we flush by doc count, even if
|
// always get N docs when we flush by doc count, even if
|
||||||
|
|
|
@ -96,7 +96,7 @@ class TermVectorsReader implements Cloneable {
|
||||||
this.size = size;
|
this.size = size;
|
||||||
// Verify the file is long enough to hold all of our
|
// Verify the file is long enough to hold all of our
|
||||||
// docs
|
// docs
|
||||||
assert numTotalDocs >= size + docStoreOffset;
|
assert numTotalDocs >= size + docStoreOffset: "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset;
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
format = 0;
|
format = 0;
|
||||||
|
|
|
@ -21,6 +21,8 @@ import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.PrintStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
@ -38,9 +40,9 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
|
|
||||||
|
|
||||||
public void testRandom() throws Exception {
|
public void testRandom() throws Exception {
|
||||||
Directory dir1 = new RAMDirectory();
|
Directory dir1 = new MockRAMDirectory();
|
||||||
// dir1 = FSDirectory.getDirectory("foofoofoo");
|
// dir1 = FSDirectory.getDirectory("foofoofoo");
|
||||||
Directory dir2 = new RAMDirectory();
|
Directory dir2 = new MockRAMDirectory();
|
||||||
// mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
|
// mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1);
|
||||||
Map docs = indexRandom(10, 100, 100, dir1);
|
Map docs = indexRandom(10, 100, 100, dir1);
|
||||||
indexSerial(docs, dir2);
|
indexSerial(docs, dir2);
|
||||||
|
@ -52,6 +54,16 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
verifyEquals(dir1, dir2, "id");
|
verifyEquals(dir1, dir2, "id");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkIndex(Directory dir) throws IOException {
|
||||||
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||||
|
CheckIndex.out = new PrintStream(bos);
|
||||||
|
if (!CheckIndex.check(dir, false, null)) {
|
||||||
|
System.out.println("CheckIndex failed");
|
||||||
|
System.out.println(bos.toString());
|
||||||
|
fail("CheckIndex failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testMultiConfig() throws Exception {
|
public void testMultiConfig() throws Exception {
|
||||||
// test lots of smaller different params together
|
// test lots of smaller different params together
|
||||||
for (int i=0; i<100; i++) { // increase iterations for better testing
|
for (int i=0; i<100; i++) { // increase iterations for better testing
|
||||||
|
@ -65,8 +77,8 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
int iter=r.nextInt(10)+1;
|
int iter=r.nextInt(10)+1;
|
||||||
int range=r.nextInt(20)+1;
|
int range=r.nextInt(20)+1;
|
||||||
|
|
||||||
Directory dir1 = new RAMDirectory();
|
Directory dir1 = new MockRAMDirectory();
|
||||||
Directory dir2 = new RAMDirectory();
|
Directory dir2 = new MockRAMDirectory();
|
||||||
Map docs = indexRandom(nThreads, iter, range, dir1);
|
Map docs = indexRandom(nThreads, iter, range, dir1);
|
||||||
indexSerial(docs, dir2);
|
indexSerial(docs, dir2);
|
||||||
verifyEquals(dir1, dir2, "id");
|
verifyEquals(dir1, dir2, "id");
|
||||||
|
@ -87,7 +99,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
// everything.
|
// everything.
|
||||||
|
|
||||||
public Map indexRandom(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
|
public Map indexRandom(int nThreads, int iterations, int range, Directory dir) throws IOException, InterruptedException {
|
||||||
IndexWriter w = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
|
IndexWriter w = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||||
w.setUseCompoundFile(false);
|
w.setUseCompoundFile(false);
|
||||||
/***
|
/***
|
||||||
w.setMaxMergeDocs(Integer.MAX_VALUE);
|
w.setMaxMergeDocs(Integer.MAX_VALUE);
|
||||||
|
@ -129,6 +141,8 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkIndex(dir);
|
||||||
|
|
||||||
return docs;
|
return docs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,6 +211,26 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
// verify stored fields are equivalent
|
// verify stored fields are equivalent
|
||||||
verifyEquals(r1.document(id1), r2.document(id2));
|
verifyEquals(r1.document(id1), r2.document(id2));
|
||||||
|
|
||||||
|
try {
|
||||||
|
// verify term vectors are equivalent
|
||||||
|
verifyEquals(r1.getTermFreqVectors(id1), r2.getTermFreqVectors(id2));
|
||||||
|
} catch (java.lang.Error e) {
|
||||||
|
System.out.println("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
|
||||||
|
TermFreqVector[] tv1 = r1.getTermFreqVectors(id1);
|
||||||
|
System.out.println(" d1=" + tv1);
|
||||||
|
if (tv1 != null)
|
||||||
|
for(int i=0;i<tv1.length;i++)
|
||||||
|
System.out.println(" " + i + ": " + tv1[i]);
|
||||||
|
|
||||||
|
TermFreqVector[] tv2 = r2.getTermFreqVectors(id2);
|
||||||
|
System.out.println(" d2=" + tv2);
|
||||||
|
if (tv2 != null)
|
||||||
|
for(int i=0;i<tv2.length;i++)
|
||||||
|
System.out.println(" " + i + ": " + tv2[i]);
|
||||||
|
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
} while (termEnum.next());
|
} while (termEnum.next());
|
||||||
|
|
||||||
termEnum.close();
|
termEnum.close();
|
||||||
|
@ -300,6 +334,55 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void verifyEquals(TermFreqVector[] d1, TermFreqVector[] d2) {
|
||||||
|
if (d1 == null) {
|
||||||
|
assertTrue(d2 == null);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
assertTrue(d2 != null);
|
||||||
|
|
||||||
|
assertEquals(d1.length, d2.length);
|
||||||
|
for(int i=0;i<d1.length;i++) {
|
||||||
|
TermFreqVector v1 = d1[i];
|
||||||
|
TermFreqVector v2 = d2[i];
|
||||||
|
assertEquals(v1.size(), v2.size());
|
||||||
|
int numTerms = v1.size();
|
||||||
|
String[] terms1 = v1.getTerms();
|
||||||
|
String[] terms2 = v2.getTerms();
|
||||||
|
int[] freq1 = v1.getTermFrequencies();
|
||||||
|
int[] freq2 = v2.getTermFrequencies();
|
||||||
|
for(int j=0;j<numTerms;j++) {
|
||||||
|
if (!terms1[j].equals(terms2[j]))
|
||||||
|
assertEquals(terms1[j], terms2[j]);
|
||||||
|
assertEquals(freq1[j], freq2[j]);
|
||||||
|
}
|
||||||
|
if (v1 instanceof TermPositionVector) {
|
||||||
|
assertTrue(v2 instanceof TermPositionVector);
|
||||||
|
TermPositionVector tpv1 = (TermPositionVector) v1;
|
||||||
|
TermPositionVector tpv2 = (TermPositionVector) v2;
|
||||||
|
for(int j=0;j<numTerms;j++) {
|
||||||
|
int[] pos1 = tpv1.getTermPositions(j);
|
||||||
|
int[] pos2 = tpv2.getTermPositions(j);
|
||||||
|
assertEquals(pos1.length, pos2.length);
|
||||||
|
TermVectorOffsetInfo[] offsets1 = tpv1.getOffsets(j);
|
||||||
|
TermVectorOffsetInfo[] offsets2 = tpv2.getOffsets(j);
|
||||||
|
if (offsets1 == null)
|
||||||
|
assertTrue(offsets2 == null);
|
||||||
|
else
|
||||||
|
assertTrue(offsets2 != null);
|
||||||
|
for(int k=0;k<pos1.length;k++) {
|
||||||
|
assertEquals(pos1[k], pos2[k]);
|
||||||
|
if (offsets1 != null) {
|
||||||
|
assertEquals(offsets1[k].getStartOffset(),
|
||||||
|
offsets2[k].getStartOffset());
|
||||||
|
assertEquals(offsets1[k].getEndOffset(),
|
||||||
|
offsets2[k].getEndOffset());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static class IndexingThread extends Thread {
|
private static class IndexingThread extends Thread {
|
||||||
IndexWriter w;
|
IndexWriter w;
|
||||||
|
@ -336,18 +419,35 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
|
|
||||||
int nFields = nextInt(maxFields);
|
int nFields = nextInt(maxFields);
|
||||||
for (int i=0; i<nFields; i++) {
|
for (int i=0; i<nFields; i++) {
|
||||||
|
|
||||||
|
Field.TermVector tvVal = Field.TermVector.NO;
|
||||||
switch (nextInt(4)) {
|
switch (nextInt(4)) {
|
||||||
case 0:
|
case 0:
|
||||||
fields.add(new Field("f0", getString(1), Field.Store.YES, Field.Index.NO_NORMS));
|
tvVal = Field.TermVector.NO;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
fields.add(new Field("f1", getString(0), Field.Store.NO, Field.Index.TOKENIZED));
|
tvVal = Field.TermVector.YES;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
fields.add(new Field("f2", getString(0), Field.Store.YES, Field.Index.NO));
|
tvVal = Field.TermVector.WITH_POSITIONS;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
fields.add(new Field("f3", getString(bigFieldSize), Field.Store.YES, Field.Index.TOKENIZED));
|
tvVal = Field.TermVector.WITH_POSITIONS_OFFSETS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (nextInt(4)) {
|
||||||
|
case 0:
|
||||||
|
fields.add(new Field("f0", getString(1), Field.Store.YES, Field.Index.NO_NORMS, tvVal));
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
fields.add(new Field("f1", getString(0), Field.Store.NO, Field.Index.TOKENIZED, tvVal));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
fields.add(new Field("f2", getString(0), Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
fields.add(new Field("f3", getString(bigFieldSize), Field.Store.YES, Field.Index.TOKENIZED, tvVal));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue