LUCENE-4629: IndexWriter fails to delete documents if Iterator<IndexDocument> throws an exception

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1421798 13f79535-47bb-0310-9956-ffa450edef68
2012-12-14 10:54:47 +00:00 · 2012-12-14 10:54:47 +00:00 · 5ed3c4654c
parent 48b1d3a2aa
commit 5ed3c4654c
3 changed files with 110 additions and 13 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -284,6 +284,10 @@ Bug Fixes
  parts of the shape's boundary erroneously too.  So center points aren't
  indexed any more; you should use another spatial field. (David Smiley)

+* LUCENE-4629: IndexWriter misses to delete documents if a document block is
+  indexed and the Iterator throws an exception. Documents were only rolled back
+  if the actual indexing process failed. (Simon Willnauer)
+
 Changes in Runtime Behavior
  
 * LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@ -296,7 +296,9 @@ class DocumentsWriterPerThread {
      infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
    }
    int docCount = 0;
+    boolean allDocsIndexed = false;
    try {
+      
      for(IndexDocument doc : docs) {
        docState.doc = doc;
        docState.docID = numDocsInRAM;
@ -309,20 +311,7 @@ class DocumentsWriterPerThread {
        } finally {
          if (!success) {
            // An exc is being thrown...
-
            if (!aborting) {
-              // One of the documents hit a non-aborting
-              // exception (eg something happened during
-              // analysis).  We now go and mark any docs
-              // from this batch that we had already indexed
-              // as deleted:
-              int docID = docState.docID;
-              final int endDocID = docID - docCount;
-              while (docID > endDocID) {
-                deleteDocID(docID);
-                docID--;
-              }
-
              // Incr here because finishDocument will not
              // be called (because an exc is being thrown):
              numDocsInRAM++;
@ -343,6 +332,7 @@ class DocumentsWriterPerThread {

        finishDocument(null);
      }
+      allDocsIndexed = true;

      // Apply delTerm only after all indexing has
      // succeeded, but apply it only to docs prior to when
@ -354,6 +344,16 @@ class DocumentsWriterPerThread {
      }

    } finally {
+      if (!allDocsIndexed && !aborting) {
+        // the iterator threw an exception that is not aborting 
+        // go and mark all docs from this block as deleted
+        int docID = numDocsInRAM-1;
+        final int endDocID = docID - docCount;
+        while (docID > endDocID) {
+          deleteDocID(docID);
+          docID--;
+        }
+      }
      docState.clear();
    }

--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@ -24,8 +24,11 @@ import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Random;
+import java.util.Set;

 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -56,7 +59,9 @@ import org.apache.lucene.store.NoLockFactory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.store.SimpleFSLockFactory;
 import org.apache.lucene.store.SingleInstanceLockFactory;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.lucene.util._TestUtil;
@ -1995,4 +2000,92 @@ public class TestIndexWriter extends LuceneTestCase {
    dir.close();
  }
  
+  public void testIterableThrowsException() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
+        TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    int iters = atLeast(100);
+    int docCount = 0;
+    int docId = 0;
+    Set<String> liveIds = new HashSet<String>();
+    for (int i = 0; i < iters; i++) {
+      List<Document> docs = new ArrayList<Document>();
+      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+      FieldType idFt = new FieldType(TextField.TYPE_STORED);
+      
+      int numDocs = atLeast(4);
+      for (int j = 0; j < numDocs; j++) {
+        Document doc = new Document();
+        doc.add(newField("id", ""+ (docId++), idFt));
+        doc.add(newField("foo", _TestUtil.randomSimpleString(random()), ft));
+        docs.add(doc);
+      }
+      boolean success = false;
+      try {
+        w.addDocuments(new RandomFailingFieldIterable(docs, random()));
+        success = true;
+      } catch (RuntimeException e) {
+        assertEquals("boom", e.getMessage());
+      } finally {
+        if (success) {
+          docCount += docs.size();
+          for (Document indexDocument : docs) {
+            liveIds.add(indexDocument.get("id"));  
+          }
+        }
+      }
+    }
+    DirectoryReader reader = w.getReader();
+    assertEquals(docCount, reader.numDocs());
+    List<AtomicReaderContext> leaves = reader.leaves();
+    for (AtomicReaderContext atomicReaderContext : leaves) {
+      AtomicReader ar = atomicReaderContext.reader();
+      Bits liveDocs = ar.getLiveDocs();
+      int maxDoc = ar.maxDoc();
+      for (int i = 0; i < maxDoc; i++) {
+        if (liveDocs.get(i)) {
+          assertTrue(liveIds.remove(ar.document(i).get("id")));
+        }
+      }
+    }
+    assertTrue(liveIds.isEmpty());
+    IOUtils.close(reader, w, dir);
+  }
+
+  private static class RandomFailingFieldIterable implements Iterable<IndexDocument> {
+    private final List<? extends IndexDocument> docList;
+    private final Random random;
+
+    public RandomFailingFieldIterable(List<? extends IndexDocument> docList, Random random) {
+      this.docList = docList;
+      this.random = random;
+    }
+    
+    @Override
+    public Iterator<IndexDocument> iterator() {
+      final Iterator<? extends IndexDocument> docIter = docList.iterator();
+      return new Iterator<IndexDocument>() {
+
+        @Override
+        public boolean hasNext() {
+          return docIter.hasNext();
+        }
+
+        @Override
+        public IndexDocument next() {
+          if (random.nextInt(5) == 0) {
+            throw new RuntimeException("boom");
+          }
+          return docIter.next();
+        }
+
+        @Override
+        public void remove() {throw new UnsupportedOperationException();}
+        
+        
+      };
+    }
+    
+  }
+  
 }