LUCENE-938: fixed certain cases where an unhandled exception could cause IndexWriter to lose buffered deletes

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@556010 13f79535-47bb-0310-9956-ffa450edef68
2007-07-13 14:23:47 +00:00 · 2007-07-13 14:23:47 +00:00 · f55e4057d2
parent 8355ab88b3
commit f55e4057d2
4 changed files with 230 additions and 1 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -31,6 +31,10 @@ Bug fixes
    that was thrown after a call of TermPositions.seek(). 
    (Rich Johnson via Michael Busch)
    
+ 4. LUCENE-938: Fixed cases where an unhandled exception in
+    IndexWriter's methods could cause deletes to be lost.
+    (Steven Parkes via Mike McCandless)
+      
 New features

 1. LUCENE-906: Elision filter for French.
--- a/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/src/java/org/apache/lucene/index/IndexWriter.java
@ -1331,8 +1331,18 @@ public class IndexWriter {
   * must make a matched (try/finally) call to
   * commitTransaction() or rollbackTransaction() to finish
   * the transaction.
+   *
+   * Note that buffered documents and delete terms are not handled
+   * within the transactions, so they must be flushed before the
+   * transaction is started.
   */
  private void startTransaction() throws IOException {
+
+    assert numBufferedDeleteTerms == 0 :
+           "calling startTransaction with buffered delete terms not supported";
+    assert docWriter.getNumDocsInRAM() == 0 :
+           "calling startTransaction with buffered documents not supported";
+
    localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
    localAutoCommit = autoCommit;
    if (localAutoCommit) {
@ -1907,6 +1917,9 @@ public class IndexWriter {

        SegmentInfos rollback = null;

+        HashMap saveBufferedDeleteTerms = null;
+        int saveNumBufferedDeleteTerms = 0;
+
        if (flushDeletes)
          rollback = (SegmentInfos) segmentInfos.clone();

@ -1941,6 +1954,8 @@ public class IndexWriter {
            // buffer deletes longer and then flush them to
            // multiple flushed segments, when
            // autoCommit=false
+            saveBufferedDeleteTerms = bufferedDeleteTerms;
+            saveNumBufferedDeleteTerms = numBufferedDeleteTerms;
            applyDeletes(flushDocs);
            doAfterFlush();
          }
@ -1955,6 +1970,12 @@ public class IndexWriter {
              // SegmentInfo instances:
              segmentInfos.clear();
              segmentInfos.addAll(rollback);
+
+              if (saveBufferedDeleteTerms != null) {
+                numBufferedDeleteTerms = saveNumBufferedDeleteTerms;
+                bufferedDeleteTerms = saveBufferedDeleteTerms;
+              }
+              
            } else {
              // Remove segment we added, if any:
              if (newSegment != null && 
@ -2330,7 +2351,13 @@ public class IndexWriter {
      }

      // Clean up bufferedDeleteTerms.
-      bufferedDeleteTerms.clear();
+
+      // Rollbacks of buffered deletes are based on restoring the old
+      // map, so don't modify this one. Rare enough that the gc
+      // overhead is almost certainly lower than the alternate, which
+      // would be clone to support rollback.
+
+      bufferedDeleteTerms = new HashMap();
      numBufferedDeleteTerms = 0;
    }
  }
--- a/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
+++ b/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
@ -19,6 +19,7 @@ package org.apache.lucene.index;

 import java.io.IOException;
 import java.util.Arrays;
+import java.lang.StackTraceElement;

 import junit.framework.TestCase;

@ -534,6 +535,146 @@ public class TestIndexWriterDelete extends TestCase {
    }
  }

+  // This test tests that buffered deletes are not lost due to i/o
+  // errors occurring after the buffered deletes have been flushed but
+  // before the segmentInfos have been successfully written
+
+  public void testErrorAfterApplyDeletes() throws IOException {
+    
+    MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() {
+        boolean sawMaybe = false;
+        boolean failed = false;
+        public MockRAMDirectory.Failure reset() {
+          sawMaybe = false;
+          failed = false;
+          return this;
+        }
+        public void eval(MockRAMDirectory dir)  throws IOException {
+          if (sawMaybe && !failed) {
+            failed = true;
+            throw new IOException("fail after applyDeletes");
+          }
+          if (!failed) {
+            StackTraceElement[] trace = new Exception().getStackTrace();
+            for (int i = 0; i < trace.length; i++) {
+              if ("applyDeletes".equals(trace[i].getMethodName())) {
+                sawMaybe = true;
+                break;
+              }
+            }
+          }
+        }
+      };
+
+    // create a couple of files
+
+    String[] keywords = { "1", "2" };
+    String[] unindexed = { "Netherlands", "Italy" };
+    String[] unstored = { "Amsterdam has lots of bridges",
+        "Venice has lots of canals" };
+    String[] text = { "Amsterdam", "Venice" };
+
+    for(int pass=0;pass<2;pass++) {
+      boolean autoCommit = (0==pass);
+      Directory ramDir = new RAMDirectory();
+      MockRAMDirectory dir = new MockRAMDirectory(ramDir);
+      IndexWriter modifier = new IndexWriter(dir, autoCommit,
+                                             new WhitespaceAnalyzer(), true);
+      modifier.setUseCompoundFile(true);
+      modifier.setMaxBufferedDeleteTerms(2);
+
+      dir.failOn(failure.reset());
+
+      for (int i = 0; i < keywords.length; i++) {
+        Document doc = new Document();
+        doc.add(new Field("id", keywords[i], Field.Store.YES,
+                          Field.Index.UN_TOKENIZED));
+        doc.add(new Field("country", unindexed[i], Field.Store.YES,
+                          Field.Index.NO));
+        doc.add(new Field("contents", unstored[i], Field.Store.NO,
+                          Field.Index.TOKENIZED));
+        doc.add(new Field("city", text[i], Field.Store.YES,
+                          Field.Index.TOKENIZED));
+        modifier.addDocument(doc);
+      }
+      // flush (and commit if ac)
+
+      modifier.optimize();
+
+      // commit if !ac
+
+      if (!autoCommit) {
+        modifier.close();
+      }
+      // one of the two files hits
+
+      Term term = new Term("city", "Amsterdam");
+      int hitCount = getHitCount(dir, term);
+      assertEquals(1, hitCount);
+
+      // open the writer again (closed above)
+
+      if (!autoCommit) {
+        modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
+        modifier.setUseCompoundFile(true);
+      }
+
+      // delete the doc
+      // max buf del terms is two, so this is buffered
+
+      modifier.deleteDocuments(term);
+
+      // add a doc (needed for the !ac case; see below)
+      // doc remains buffered
+
+      Document doc = new Document();
+      modifier.addDocument(doc);
+
+      // flush the changes, the buffered deletes, and the new doc
+
+      // The failure object will fail on the first write after the del
+      // file gets created when processing the buffered delete
+
+      // in the ac case, this will be when writing the new segments
+      // files so we really don't need the new doc, but it's harmless
+
+      // in the !ac case, a new segments file won't be created but in
+      // this case, creation of the cfs file happens next so we need
+      // the doc (to test that it's okay that we don't lose deletes if
+      // failing while creating the cfs file
+
+      boolean failed = false;
+      try {
+        modifier.flush();
+      } catch (IOException ioe) {
+        failed = true;
+      }
+
+      assertTrue(failed);
+
+      // The flush above failed, so we need to retry it (which will
+      // succeed, because the failure is a one-shot)
+
+      if (!autoCommit) {
+        modifier.close();
+      } else {
+        modifier.flush();
+      }
+
+      hitCount = getHitCount(dir, term);
+
+      // If we haven't lost the delete the hit count will be zero
+
+      assertEquals(0, hitCount);
+
+      if (autoCommit) {
+        modifier.close();
+      }
+
+      dir.close();
+    }
+  }
+
  private String arrayToString(String[] l) {
    String s = "";
    for (int i = 0; i < l.length; i++) {
--- a/src/test/org/apache/lucene/store/MockRAMDirectory.java
+++ b/src/test/org/apache/lucene/store/MockRAMDirectory.java
@ -24,6 +24,7 @@ import java.util.Iterator;
 import java.util.Random;
 import java.util.Map;
 import java.util.HashMap;
+import java.util.ArrayList;

 /**
 * This is a subclass of RAMDirectory that adds methods
@ -116,6 +117,7 @@ public class MockRAMDirectory extends RAMDirectory {
  }

  void maybeThrowIOException() throws IOException {
+    maybeThrowDeterministicException();
    if (randomIOExceptionRate > 0.0) {
      int number = Math.abs(randomState.nextInt() % 1000);
      if (number < randomIOExceptionRate*1000) {
@ -213,4 +215,59 @@ public class MockRAMDirectory extends RAMDirectory {
      }
    }
  }
+
+  /**
+   * Objects that represent fail-able conditions. Objects of a derived
+   * class are created and registered with the mock directory. After
+   * register, each object will be invoked once for each first write
+   * of a file, giving the object a chance to throw an IOException.
+   */
+  public static class Failure {
+    /**
+     * eval is called on the first write of every new file.
+     */
+    public void eval(MockRAMDirectory dir) throws IOException { }
+
+    /**
+     * reset should set the state of the failure to its default
+     * (freshly constructed) state. Reset is convenient for tests
+     * that want to create one failure object and then reuse it in
+     * multiple cases. This, combined with the fact that Failure
+     * subclasses are often anonymous classes makes reset difficult to
+     * do otherwise.
+     *
+     * A typical example of use is
+     * Failure failure = new Failure() { ... };
+     * ...
+     * mock.failOn(failure.reset())
+     */
+    public Failure reset() { return this; }
+  }
+
+  ArrayList failures;
+
+  /**
+   * add a Failure object to the list of objects to be evaluated
+   * at every potential failure point
+   */
+  public void failOn(Failure fail) {
+    if (failures == null) {
+      failures = new ArrayList();
+    }
+    failures.add(fail);
+  }
+
+  /**
+   * Itterate through the failures list, giving each object a
+   * chance to throw an IOE
+   */
+  void maybeThrowDeterministicException() throws IOException {
+    if (failures != null) {
+      for(int i = 0; i < failures.size(); i++) {
+        ((Failure)failures.get(i)).eval(this);
+      }
+    }
+  }
+
+
 }