LUCENE-4629: IndexWriter fails to delete documents if Iterator<IndexDocument> throws an exception

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1421798 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2012-12-14 10:54:47 +00:00
parent 48b1d3a2aa
commit 5ed3c4654c
3 changed files with 110 additions and 13 deletions

View File

@ -284,6 +284,10 @@ Bug Fixes
parts of the shape's boundary erroneously too. So center points aren't parts of the shape's boundary erroneously too. So center points aren't
indexed any more; you should use another spatial field. (David Smiley) indexed any more; you should use another spatial field. (David Smiley)
* LUCENE-4629: IndexWriter misses to delete documents if a document block is
indexed and the Iterator throws an exception. Documents were only rolled back
if the actual indexing process failed. (Simon Willnauer)
Changes in Runtime Behavior Changes in Runtime Behavior
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE. * LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.

View File

@ -296,7 +296,9 @@ class DocumentsWriterPerThread {
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name); infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
} }
int docCount = 0; int docCount = 0;
boolean allDocsIndexed = false;
try { try {
for(IndexDocument doc : docs) { for(IndexDocument doc : docs) {
docState.doc = doc; docState.doc = doc;
docState.docID = numDocsInRAM; docState.docID = numDocsInRAM;
@ -309,20 +311,7 @@ class DocumentsWriterPerThread {
} finally { } finally {
if (!success) { if (!success) {
// An exc is being thrown... // An exc is being thrown...
if (!aborting) { if (!aborting) {
// One of the documents hit a non-aborting
// exception (eg something happened during
// analysis). We now go and mark any docs
// from this batch that we had already indexed
// as deleted:
int docID = docState.docID;
final int endDocID = docID - docCount;
while (docID > endDocID) {
deleteDocID(docID);
docID--;
}
// Incr here because finishDocument will not // Incr here because finishDocument will not
// be called (because an exc is being thrown): // be called (because an exc is being thrown):
numDocsInRAM++; numDocsInRAM++;
@ -343,6 +332,7 @@ class DocumentsWriterPerThread {
finishDocument(null); finishDocument(null);
} }
allDocsIndexed = true;
// Apply delTerm only after all indexing has // Apply delTerm only after all indexing has
// succeeded, but apply it only to docs prior to when // succeeded, but apply it only to docs prior to when
@ -354,6 +344,16 @@ class DocumentsWriterPerThread {
} }
} finally { } finally {
if (!allDocsIndexed && !aborting) {
// the iterator threw an exception that is not aborting
// go and mark all docs from this block as deleted
int docID = numDocsInRAM-1;
final int endDocID = docID - docCount;
while (docID > endDocID) {
deleteDocID(docID);
docID--;
}
}
docState.clear(); docState.clear();
} }

View File

@ -24,8 +24,11 @@ import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -56,7 +59,9 @@ import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.lucene.store.SingleInstanceLockFactory; import org.apache.lucene.store.SingleInstanceLockFactory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
@ -1995,4 +2000,92 @@ public class TestIndexWriter extends LuceneTestCase {
dir.close(); dir.close();
} }
public void testIterableThrowsException() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random())));
int iters = atLeast(100);
int docCount = 0;
int docId = 0;
Set<String> liveIds = new HashSet<String>();
for (int i = 0; i < iters; i++) {
List<Document> docs = new ArrayList<Document>();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
FieldType idFt = new FieldType(TextField.TYPE_STORED);
int numDocs = atLeast(4);
for (int j = 0; j < numDocs; j++) {
Document doc = new Document();
doc.add(newField("id", ""+ (docId++), idFt));
doc.add(newField("foo", _TestUtil.randomSimpleString(random()), ft));
docs.add(doc);
}
boolean success = false;
try {
w.addDocuments(new RandomFailingFieldIterable(docs, random()));
success = true;
} catch (RuntimeException e) {
assertEquals("boom", e.getMessage());
} finally {
if (success) {
docCount += docs.size();
for (Document indexDocument : docs) {
liveIds.add(indexDocument.get("id"));
}
}
}
}
DirectoryReader reader = w.getReader();
assertEquals(docCount, reader.numDocs());
List<AtomicReaderContext> leaves = reader.leaves();
for (AtomicReaderContext atomicReaderContext : leaves) {
AtomicReader ar = atomicReaderContext.reader();
Bits liveDocs = ar.getLiveDocs();
int maxDoc = ar.maxDoc();
for (int i = 0; i < maxDoc; i++) {
if (liveDocs.get(i)) {
assertTrue(liveIds.remove(ar.document(i).get("id")));
}
}
}
assertTrue(liveIds.isEmpty());
IOUtils.close(reader, w, dir);
}
private static class RandomFailingFieldIterable implements Iterable<IndexDocument> {
private final List<? extends IndexDocument> docList;
private final Random random;
public RandomFailingFieldIterable(List<? extends IndexDocument> docList, Random random) {
this.docList = docList;
this.random = random;
}
@Override
public Iterator<IndexDocument> iterator() {
final Iterator<? extends IndexDocument> docIter = docList.iterator();
return new Iterator<IndexDocument>() {
@Override
public boolean hasNext() {
return docIter.hasNext();
}
@Override
public IndexDocument next() {
if (random.nextInt(5) == 0) {
throw new RuntimeException("boom");
}
return docIter.next();
}
@Override
public void remove() {throw new UnsupportedOperationException();}
};
}
}
} }