mirror of
https://github.com/apache/lucene.git
synced 2025-02-16 15:06:41 +00:00
LUCENE-4629: IndexWriter fails to delete documents if Iterator<IndexDocument> throws an exception
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1421798 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
48b1d3a2aa
commit
5ed3c4654c
@ -284,6 +284,10 @@ Bug Fixes
|
|||||||
parts of the shape's boundary erroneously too. So center points aren't
|
parts of the shape's boundary erroneously too. So center points aren't
|
||||||
indexed any more; you should use another spatial field. (David Smiley)
|
indexed any more; you should use another spatial field. (David Smiley)
|
||||||
|
|
||||||
|
* LUCENE-4629: IndexWriter misses to delete documents if a document block is
|
||||||
|
indexed and the Iterator throws an exception. Documents were only rolled back
|
||||||
|
if the actual indexing process failed. (Simon Willnauer)
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.
|
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.
|
||||||
|
@ -296,7 +296,9 @@ class DocumentsWriterPerThread {
|
|||||||
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
|
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
|
||||||
}
|
}
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
|
boolean allDocsIndexed = false;
|
||||||
try {
|
try {
|
||||||
|
|
||||||
for(IndexDocument doc : docs) {
|
for(IndexDocument doc : docs) {
|
||||||
docState.doc = doc;
|
docState.doc = doc;
|
||||||
docState.docID = numDocsInRAM;
|
docState.docID = numDocsInRAM;
|
||||||
@ -309,20 +311,7 @@ class DocumentsWriterPerThread {
|
|||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
// An exc is being thrown...
|
// An exc is being thrown...
|
||||||
|
|
||||||
if (!aborting) {
|
if (!aborting) {
|
||||||
// One of the documents hit a non-aborting
|
|
||||||
// exception (eg something happened during
|
|
||||||
// analysis). We now go and mark any docs
|
|
||||||
// from this batch that we had already indexed
|
|
||||||
// as deleted:
|
|
||||||
int docID = docState.docID;
|
|
||||||
final int endDocID = docID - docCount;
|
|
||||||
while (docID > endDocID) {
|
|
||||||
deleteDocID(docID);
|
|
||||||
docID--;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Incr here because finishDocument will not
|
// Incr here because finishDocument will not
|
||||||
// be called (because an exc is being thrown):
|
// be called (because an exc is being thrown):
|
||||||
numDocsInRAM++;
|
numDocsInRAM++;
|
||||||
@ -343,6 +332,7 @@ class DocumentsWriterPerThread {
|
|||||||
|
|
||||||
finishDocument(null);
|
finishDocument(null);
|
||||||
}
|
}
|
||||||
|
allDocsIndexed = true;
|
||||||
|
|
||||||
// Apply delTerm only after all indexing has
|
// Apply delTerm only after all indexing has
|
||||||
// succeeded, but apply it only to docs prior to when
|
// succeeded, but apply it only to docs prior to when
|
||||||
@ -354,6 +344,16 @@ class DocumentsWriterPerThread {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
|
if (!allDocsIndexed && !aborting) {
|
||||||
|
// the iterator threw an exception that is not aborting
|
||||||
|
// go and mark all docs from this block as deleted
|
||||||
|
int docID = numDocsInRAM-1;
|
||||||
|
final int endDocID = docID - docCount;
|
||||||
|
while (docID > endDocID) {
|
||||||
|
deleteDocID(docID);
|
||||||
|
docID--;
|
||||||
|
}
|
||||||
|
}
|
||||||
docState.clear();
|
docState.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,8 +24,11 @@ import java.io.StringReader;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
@ -56,7 +59,9 @@ import org.apache.lucene.store.NoLockFactory;
|
|||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.store.SimpleFSLockFactory;
|
import org.apache.lucene.store.SimpleFSLockFactory;
|
||||||
import org.apache.lucene.store.SingleInstanceLockFactory;
|
import org.apache.lucene.store.SingleInstanceLockFactory;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.ThreadInterruptedException;
|
import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
@ -1995,4 +2000,92 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testIterableThrowsException() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||||
|
TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||||
|
int iters = atLeast(100);
|
||||||
|
int docCount = 0;
|
||||||
|
int docId = 0;
|
||||||
|
Set<String> liveIds = new HashSet<String>();
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
List<Document> docs = new ArrayList<Document>();
|
||||||
|
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
FieldType idFt = new FieldType(TextField.TYPE_STORED);
|
||||||
|
|
||||||
|
int numDocs = atLeast(4);
|
||||||
|
for (int j = 0; j < numDocs; j++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newField("id", ""+ (docId++), idFt));
|
||||||
|
doc.add(newField("foo", _TestUtil.randomSimpleString(random()), ft));
|
||||||
|
docs.add(doc);
|
||||||
|
}
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
w.addDocuments(new RandomFailingFieldIterable(docs, random()));
|
||||||
|
success = true;
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
assertEquals("boom", e.getMessage());
|
||||||
|
} finally {
|
||||||
|
if (success) {
|
||||||
|
docCount += docs.size();
|
||||||
|
for (Document indexDocument : docs) {
|
||||||
|
liveIds.add(indexDocument.get("id"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DirectoryReader reader = w.getReader();
|
||||||
|
assertEquals(docCount, reader.numDocs());
|
||||||
|
List<AtomicReaderContext> leaves = reader.leaves();
|
||||||
|
for (AtomicReaderContext atomicReaderContext : leaves) {
|
||||||
|
AtomicReader ar = atomicReaderContext.reader();
|
||||||
|
Bits liveDocs = ar.getLiveDocs();
|
||||||
|
int maxDoc = ar.maxDoc();
|
||||||
|
for (int i = 0; i < maxDoc; i++) {
|
||||||
|
if (liveDocs.get(i)) {
|
||||||
|
assertTrue(liveIds.remove(ar.document(i).get("id")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertTrue(liveIds.isEmpty());
|
||||||
|
IOUtils.close(reader, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class RandomFailingFieldIterable implements Iterable<IndexDocument> {
|
||||||
|
private final List<? extends IndexDocument> docList;
|
||||||
|
private final Random random;
|
||||||
|
|
||||||
|
public RandomFailingFieldIterable(List<? extends IndexDocument> docList, Random random) {
|
||||||
|
this.docList = docList;
|
||||||
|
this.random = random;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<IndexDocument> iterator() {
|
||||||
|
final Iterator<? extends IndexDocument> docIter = docList.iterator();
|
||||||
|
return new Iterator<IndexDocument>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return docIter.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexDocument next() {
|
||||||
|
if (random.nextInt(5) == 0) {
|
||||||
|
throw new RuntimeException("boom");
|
||||||
|
}
|
||||||
|
return docIter.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void remove() {throw new UnsupportedOperationException();}
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user