mirror of https://github.com/apache/lucene.git
LUCENE-4629: IndexWriter fails to delete documents if Iterator<IndexDocument> throws an exception
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1421798 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
48b1d3a2aa
commit
5ed3c4654c
|
@ -284,6 +284,10 @@ Bug Fixes
|
|||
parts of the shape's boundary erroneously too. So center points aren't
|
||||
indexed any more; you should use another spatial field. (David Smiley)
|
||||
|
||||
* LUCENE-4629: IndexWriter misses to delete documents if a document block is
|
||||
indexed and the Iterator throws an exception. Documents were only rolled back
|
||||
if the actual indexing process failed. (Simon Willnauer)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.
|
||||
|
|
|
@ -296,7 +296,9 @@ class DocumentsWriterPerThread {
|
|||
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
|
||||
}
|
||||
int docCount = 0;
|
||||
boolean allDocsIndexed = false;
|
||||
try {
|
||||
|
||||
for(IndexDocument doc : docs) {
|
||||
docState.doc = doc;
|
||||
docState.docID = numDocsInRAM;
|
||||
|
@ -309,20 +311,7 @@ class DocumentsWriterPerThread {
|
|||
} finally {
|
||||
if (!success) {
|
||||
// An exc is being thrown...
|
||||
|
||||
if (!aborting) {
|
||||
// One of the documents hit a non-aborting
|
||||
// exception (eg something happened during
|
||||
// analysis). We now go and mark any docs
|
||||
// from this batch that we had already indexed
|
||||
// as deleted:
|
||||
int docID = docState.docID;
|
||||
final int endDocID = docID - docCount;
|
||||
while (docID > endDocID) {
|
||||
deleteDocID(docID);
|
||||
docID--;
|
||||
}
|
||||
|
||||
// Incr here because finishDocument will not
|
||||
// be called (because an exc is being thrown):
|
||||
numDocsInRAM++;
|
||||
|
@ -343,6 +332,7 @@ class DocumentsWriterPerThread {
|
|||
|
||||
finishDocument(null);
|
||||
}
|
||||
allDocsIndexed = true;
|
||||
|
||||
// Apply delTerm only after all indexing has
|
||||
// succeeded, but apply it only to docs prior to when
|
||||
|
@ -354,6 +344,16 @@ class DocumentsWriterPerThread {
|
|||
}
|
||||
|
||||
} finally {
|
||||
if (!allDocsIndexed && !aborting) {
|
||||
// the iterator threw an exception that is not aborting
|
||||
// go and mark all docs from this block as deleted
|
||||
int docID = numDocsInRAM-1;
|
||||
final int endDocID = docID - docCount;
|
||||
while (docID > endDocID) {
|
||||
deleteDocID(docID);
|
||||
docID--;
|
||||
}
|
||||
}
|
||||
docState.clear();
|
||||
}
|
||||
|
||||
|
|
|
@ -24,8 +24,11 @@ import java.io.StringReader;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -56,7 +59,9 @@ import org.apache.lucene.store.NoLockFactory;
|
|||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.store.SimpleFSLockFactory;
|
||||
import org.apache.lucene.store.SingleInstanceLockFactory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
@ -1995,4 +2000,92 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testIterableThrowsException() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
int iters = atLeast(100);
|
||||
int docCount = 0;
|
||||
int docId = 0;
|
||||
Set<String> liveIds = new HashSet<String>();
|
||||
for (int i = 0; i < iters; i++) {
|
||||
List<Document> docs = new ArrayList<Document>();
|
||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
FieldType idFt = new FieldType(TextField.TYPE_STORED);
|
||||
|
||||
int numDocs = atLeast(4);
|
||||
for (int j = 0; j < numDocs; j++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newField("id", ""+ (docId++), idFt));
|
||||
doc.add(newField("foo", _TestUtil.randomSimpleString(random()), ft));
|
||||
docs.add(doc);
|
||||
}
|
||||
boolean success = false;
|
||||
try {
|
||||
w.addDocuments(new RandomFailingFieldIterable(docs, random()));
|
||||
success = true;
|
||||
} catch (RuntimeException e) {
|
||||
assertEquals("boom", e.getMessage());
|
||||
} finally {
|
||||
if (success) {
|
||||
docCount += docs.size();
|
||||
for (Document indexDocument : docs) {
|
||||
liveIds.add(indexDocument.get("id"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
DirectoryReader reader = w.getReader();
|
||||
assertEquals(docCount, reader.numDocs());
|
||||
List<AtomicReaderContext> leaves = reader.leaves();
|
||||
for (AtomicReaderContext atomicReaderContext : leaves) {
|
||||
AtomicReader ar = atomicReaderContext.reader();
|
||||
Bits liveDocs = ar.getLiveDocs();
|
||||
int maxDoc = ar.maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
if (liveDocs.get(i)) {
|
||||
assertTrue(liveIds.remove(ar.document(i).get("id")));
|
||||
}
|
||||
}
|
||||
}
|
||||
assertTrue(liveIds.isEmpty());
|
||||
IOUtils.close(reader, w, dir);
|
||||
}
|
||||
|
||||
private static class RandomFailingFieldIterable implements Iterable<IndexDocument> {
|
||||
private final List<? extends IndexDocument> docList;
|
||||
private final Random random;
|
||||
|
||||
public RandomFailingFieldIterable(List<? extends IndexDocument> docList, Random random) {
|
||||
this.docList = docList;
|
||||
this.random = random;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<IndexDocument> iterator() {
|
||||
final Iterator<? extends IndexDocument> docIter = docList.iterator();
|
||||
return new Iterator<IndexDocument>() {
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return docIter.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexDocument next() {
|
||||
if (random.nextInt(5) == 0) {
|
||||
throw new RuntimeException("boom");
|
||||
}
|
||||
return docIter.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {throw new UnsupportedOperationException();}
|
||||
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue