LUCENE-4629: IndexWriter fails to delete documents if Iterator<IndexDocument> throws an exception

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1421798 13f79535-47bb-0310-9956-ffa450edef68
2025-02-16 15:06:41 +00:00 · 2012-12-14 10:54:47 +00:00 · 2012-12-14 10:54:47 +00:00 · 5ed3c4654c
commit 5ed3c4654c
parent 48b1d3a2aa
3 changed files with 110 additions and 13 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -284,6 +284,10 @@ Bug Fixes
  parts of the shape's boundary erroneously too.  So center points aren't
  indexed any more; you should use another spatial field. (David Smiley)
 * LUCENE-4629: IndexWriter misses to delete documents if a document block is
  indexed and the Iterator throws an exception. Documents were only rolled back
  if the actual indexing process failed. (Simon Willnauer)
 Changes in Runtime Behavior
 * LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@ -296,7 +296,9 @@ class DocumentsWriterPerThread {
      infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
    }
    int docCount = 0;
    boolean allDocsIndexed = false;
    try {
      for(IndexDocument doc : docs) {
        docState.doc = doc;
        docState.docID = numDocsInRAM;
@ -309,20 +311,7 @@ class DocumentsWriterPerThread {
        } finally {
          if (!success) {
            // An exc is being thrown...
            if (!aborting) {
              // One of the documents hit a non-aborting
              // exception (eg something happened during
              // analysis).  We now go and mark any docs
              // from this batch that we had already indexed
              // as deleted:
              int docID = docState.docID;
              final int endDocID = docID - docCount;
              while (docID > endDocID) {
                deleteDocID(docID);
                docID--;
              }
              // Incr here because finishDocument will not
              // be called (because an exc is being thrown):
              numDocsInRAM++;
@ -343,6 +332,7 @@ class DocumentsWriterPerThread {
        finishDocument(null);
      }
      allDocsIndexed = true;
      // Apply delTerm only after all indexing has
      // succeeded, but apply it only to docs prior to when
@ -354,6 +344,16 @@ class DocumentsWriterPerThread {
      }
    } finally {
      if (!allDocsIndexed && !aborting) {
        // the iterator threw an exception that is not aborting 
        // go and mark all docs from this block as deleted
        int docID = numDocsInRAM-1;
        final int endDocID = docID - docCount;
        while (docID > endDocID) {
          deleteDocID(docID);
          docID--;
        }
      }
      docState.clear();
    }
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@ -24,8 +24,11 @@ import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Random;
 import java.util.Set;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -56,7 +59,9 @@ import org.apache.lucene.store.NoLockFactory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.store.SimpleFSLockFactory;
 import org.apache.lucene.store.SingleInstanceLockFactory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.lucene.util._TestUtil;
@ -1995,4 +2000,92 @@ public class TestIndexWriter extends LuceneTestCase {
    dir.close();
  }
  public void testIterableThrowsException() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
        TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    int iters = atLeast(100);
    int docCount = 0;
    int docId = 0;
    Set<String> liveIds = new HashSet<String>();
    for (int i = 0; i < iters; i++) {
      List<Document> docs = new ArrayList<Document>();
      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
      FieldType idFt = new FieldType(TextField.TYPE_STORED);
      int numDocs = atLeast(4);
      for (int j = 0; j < numDocs; j++) {
        Document doc = new Document();
        doc.add(newField("id", ""+ (docId++), idFt));
        doc.add(newField("foo", _TestUtil.randomSimpleString(random()), ft));
        docs.add(doc);
      }
      boolean success = false;
      try {
        w.addDocuments(new RandomFailingFieldIterable(docs, random()));
        success = true;
      } catch (RuntimeException e) {
        assertEquals("boom", e.getMessage());
      } finally {
        if (success) {
          docCount += docs.size();
          for (Document indexDocument : docs) {
            liveIds.add(indexDocument.get("id"));  
          }
        }
      }
    }
    DirectoryReader reader = w.getReader();
    assertEquals(docCount, reader.numDocs());
    List<AtomicReaderContext> leaves = reader.leaves();
    for (AtomicReaderContext atomicReaderContext : leaves) {
      AtomicReader ar = atomicReaderContext.reader();
      Bits liveDocs = ar.getLiveDocs();
      int maxDoc = ar.maxDoc();
      for (int i = 0; i < maxDoc; i++) {
        if (liveDocs.get(i)) {
          assertTrue(liveIds.remove(ar.document(i).get("id")));
        }
      }
    }
    assertTrue(liveIds.isEmpty());
    IOUtils.close(reader, w, dir);
  }
  private static class RandomFailingFieldIterable implements Iterable<IndexDocument> {
    private final List<? extends IndexDocument> docList;
    private final Random random;
    public RandomFailingFieldIterable(List<? extends IndexDocument> docList, Random random) {
      this.docList = docList;
      this.random = random;
    }
    @Override
    public Iterator<IndexDocument> iterator() {
      final Iterator<? extends IndexDocument> docIter = docList.iterator();
      return new Iterator<IndexDocument>() {
        @Override
        public boolean hasNext() {
          return docIter.hasNext();
        }
        @Override
        public IndexDocument next() {
          if (random.nextInt(5) == 0) {
            throw new RuntimeException("boom");
          }
          return docIter.next();
        }
        @Override
        public void remove() {throw new UnsupportedOperationException();}
      };
    }
  }
 }