LUCENE-4629: IndexWriter fails to delete documents if Iterator<IndexDocument> throws an exception

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1421798 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-12-14 10:54:47 +00:00
parent 48b1d3a2aa
commit 5ed3c4654c
3 changed files with 110 additions and 13 deletions

View File

@ -284,6 +284,10 @@ Bug Fixes
parts of the shape's boundary erroneously too. So center points aren't
indexed any more; you should use another spatial field. (David Smiley)
* LUCENE-4629: IndexWriter misses to delete documents if a document block is
indexed and the Iterator throws an exception. Documents were only rolled back
if the actual indexing process failed. (Simon Willnauer)
Changes in Runtime Behavior
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.

View File

@ -296,7 +296,9 @@ class DocumentsWriterPerThread {
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
}
int docCount = 0;
boolean allDocsIndexed = false;
try {
for(IndexDocument doc : docs) {
docState.doc = doc;
docState.docID = numDocsInRAM;
@ -309,20 +311,7 @@ class DocumentsWriterPerThread {
} finally {
if (!success) {
// An exc is being thrown...
if (!aborting) {
// One of the documents hit a non-aborting
// exception (eg something happened during
// analysis). We now go and mark any docs
// from this batch that we had already indexed
// as deleted:
int docID = docState.docID;
final int endDocID = docID - docCount;
while (docID > endDocID) {
deleteDocID(docID);
docID--;
}
// Incr here because finishDocument will not
// be called (because an exc is being thrown):
numDocsInRAM++;
@ -343,6 +332,7 @@ class DocumentsWriterPerThread {
finishDocument(null);
}
allDocsIndexed = true;
// Apply delTerm only after all indexing has
// succeeded, but apply it only to docs prior to when
@ -354,6 +344,16 @@ class DocumentsWriterPerThread {
}
} finally {
if (!allDocsIndexed && !aborting) {
// the iterator threw an exception that is not aborting
// go and mark all docs from this block as deleted
int docID = numDocsInRAM-1;
final int endDocID = docID - docCount;
while (docID > endDocID) {
deleteDocID(docID);
docID--;
}
}
docState.clear();
}

View File

@ -24,8 +24,11 @@ import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -56,7 +59,9 @@ import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.lucene.store.SingleInstanceLockFactory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util._TestUtil;
@ -1995,4 +2000,92 @@ public class TestIndexWriter extends LuceneTestCase {
dir.close();
}
public void testIterableThrowsException() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random())));
int iters = atLeast(100);
int docCount = 0;
int docId = 0;
Set<String> liveIds = new HashSet<String>();
for (int i = 0; i < iters; i++) {
List<Document> docs = new ArrayList<Document>();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
FieldType idFt = new FieldType(TextField.TYPE_STORED);
int numDocs = atLeast(4);
for (int j = 0; j < numDocs; j++) {
Document doc = new Document();
doc.add(newField("id", ""+ (docId++), idFt));
doc.add(newField("foo", _TestUtil.randomSimpleString(random()), ft));
docs.add(doc);
}
boolean success = false;
try {
w.addDocuments(new RandomFailingFieldIterable(docs, random()));
success = true;
} catch (RuntimeException e) {
assertEquals("boom", e.getMessage());
} finally {
if (success) {
docCount += docs.size();
for (Document indexDocument : docs) {
liveIds.add(indexDocument.get("id"));
}
}
}
}
DirectoryReader reader = w.getReader();
assertEquals(docCount, reader.numDocs());
List<AtomicReaderContext> leaves = reader.leaves();
for (AtomicReaderContext atomicReaderContext : leaves) {
AtomicReader ar = atomicReaderContext.reader();
Bits liveDocs = ar.getLiveDocs();
int maxDoc = ar.maxDoc();
for (int i = 0; i < maxDoc; i++) {
if (liveDocs.get(i)) {
assertTrue(liveIds.remove(ar.document(i).get("id")));
}
}
}
assertTrue(liveIds.isEmpty());
IOUtils.close(reader, w, dir);
}
private static class RandomFailingFieldIterable implements Iterable<IndexDocument> {
private final List<? extends IndexDocument> docList;
private final Random random;
public RandomFailingFieldIterable(List<? extends IndexDocument> docList, Random random) {
this.docList = docList;
this.random = random;
}
@Override
public Iterator<IndexDocument> iterator() {
final Iterator<? extends IndexDocument> docIter = docList.iterator();
return new Iterator<IndexDocument>() {
@Override
public boolean hasNext() {
return docIter.hasNext();
}
@Override
public IndexDocument next() {
if (random.nextInt(5) == 0) {
throw new RuntimeException("boom");
}
return docIter.next();
}
@Override
public void remove() {throw new UnsupportedOperationException();}
};
}
}
}