LUCENE-2720: IndexWriter should throw IndexFormatTooOldExc on open, not later during optimize/getReader/close (trunk)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1062325 13f79535-47bb-0310-9956-ffa450edef68
2011-01-23 05:10:48 +00:00 · 2011-01-23 05:10:48 +00:00 · 133e70cad6
parent 22f0fe9718
commit 133e70cad6
12 changed files with 133 additions and 40 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -141,6 +141,9 @@ Changes in Runtime Behavior
  would populate "fake norms" with Similarity.getDefault() for these documents.
  (Robert Muir, Mike Mccandless)
  
+* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather 
+  than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
+
 API Changes

 * LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
--- a/lucene/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldsReader.java
@ -37,8 +37,10 @@ import java.io.Reader;
 * Class responsible for access to stored document fields.
 * <p/>
 * It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
+ * 
+ * @lucene.internal
 */
-final class FieldsReader implements Cloneable {
+public final class FieldsReader implements Cloneable {
  private final static int FORMAT_SIZE = 4;

  private final FieldInfos fieldInfos;
@ -75,6 +77,23 @@ final class FieldsReader implements Cloneable {
    return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
  }

+  /** Verifies that the code version which wrote the segment is supported. */
+  public static void checkCodeVersion(Directory dir, String segment) throws IOException {
+    final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION);
+    IndexInput idxStream = dir.openInput(indexStreamFN, 1024);
+    
+    try {
+      int format = idxStream.readInt();
+      if (format < FieldsWriter.FORMAT_MINIMUM)
+        throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
+      if (format > FieldsWriter.FORMAT_CURRENT)
+        throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
+    } finally {
+      idxStream.close();
+    }
+  
+  }
+  
  // Used only by clone
  private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
                       IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) {
@ -89,11 +108,11 @@ final class FieldsReader implements Cloneable {
    indexStream = (IndexInput) cloneableIndexStream.clone();
  }
  
-  FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
+  public FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
    this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0);
  }

-  FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
+  public FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
    boolean success = false;
    isOriginal = true;
    try {
@ -157,7 +176,7 @@ final class FieldsReader implements Cloneable {
   *
   * @throws IOException
   */
-  final void close() throws IOException {
+  public final void close() throws IOException {
    if (!closed) {
      if (fieldsStream != null) {
        fieldsStream.close();
@ -178,7 +197,7 @@ final class FieldsReader implements Cloneable {
    }
  }

-  final int size() {
+  public final int size() {
    return size;
  }

@ -186,7 +205,7 @@ final class FieldsReader implements Cloneable {
    indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
  }

-  final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+  public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
    seekIndex(n);
    long position = indexStream.readLong();
    fieldsStream.seek(position);
@ -237,7 +256,7 @@ final class FieldsReader implements Cloneable {
   *  contiguous range of length numDocs starting with
   *  startDocID.  Returns the IndexInput (the fieldStream),
   *  already seeked to the starting point for startDocID.*/
-  final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
+  public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
    seekIndex(startDocID);
    long startOffset = indexStream.readLong();
    long lastOffset = startOffset;
--- a/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexFormatTooOldException.java
@ -23,6 +23,11 @@ package org.apache.lucene.index;
 */
 public class IndexFormatTooOldException extends CorruptIndexException {

+  public IndexFormatTooOldException(String filename, String version) {
+    super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") +
+        ": " + version + ". This version of Lucene only supports indexes created with release 3.0 and later.");
+  }
+  
  public IndexFormatTooOldException(String filename, int version, int minVersion, int maxVersion) {
    super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") +
        ": " + version + " (needs to be between " + minVersion + " and " + maxVersion +
--- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
@ -605,8 +605,6 @@ public class IndexWriter implements Closeable {
    }
  }
  
-  
-  
  /**
   * Obtain the number of deleted docs for a pooled reader.
   * If the reader isn't being pooled, the segmentInfo's 
@ -715,11 +713,8 @@ public class IndexWriter implements Closeable {

    boolean success = false;

-    // TODO: we should check whether this index is too old,
-    // and throw an IndexFormatTooOldExc up front, here,
-    // instead of later when merge, applyDeletes, getReader
-    // is attempted.  I think to do this we should store the
-    // oldest segment's version in segments_N.
+    // If index is too old, reading the segments will throw
+    // IndexFormatTooOldException.
    segmentInfos = new SegmentInfos(codecs);
    try {
      if (create) {
@ -982,6 +977,7 @@ public class IndexWriter implements Closeable {
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
+  @Override
  public void close() throws CorruptIndexException, IOException {
    close(true);
  }
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Constants;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
@ -87,6 +88,13 @@ public final class SegmentInfo {

  private Map<String,String> diagnostics;

+  // Tracks the Lucene version this segment was created with, since 3.1. Null 
+  // indicates an older than 3.0 index, and it's used to detect a too old index.
+  // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and 
+  // specific versions afterwards ("3.0", "3.1" etc.).
+  // see Constants.LUCENE_MAIN_VERSION.
+  private String version;
+  
  public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
                     boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors) {
    this.name = name;
@ -99,6 +107,7 @@ public final class SegmentInfo {
    this.segmentCodecs = segmentCodecs;
    this.hasVectors = hasVectors;
    delCount = 0;
+    version = Constants.LUCENE_MAIN_VERSION;
  }

  /**
@ -106,6 +115,7 @@ public final class SegmentInfo {
   */
  void reset(SegmentInfo src) {
    clearFiles();
+    version = src.version;
    name = src.name;
    docCount = src.docCount;
    dir = src.dir;
@ -145,6 +155,9 @@ public final class SegmentInfo {
   */
  public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
    this.dir = dir;
+    if (format <= DefaultSegmentInfosWriter.FORMAT_3_1) {
+      version = input.readString();
+    }
    name = input.readString();
    docCount = input.readInt();
    delGen = input.readLong();
@ -293,6 +306,7 @@ public final class SegmentInfo {
      si.normGen = normGen.clone();
    }
    si.hasVectors = hasVectors;
+    si.version = version;
    return si;
  }

@ -433,6 +447,8 @@ public final class SegmentInfo {
  public void write(IndexOutput output)
    throws IOException {
    assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
+    // Write the Lucene version that created this segment, since 3.1
+    output.writeString(version);
    output.writeString(name);
    output.writeInt(docCount);
    output.writeLong(delGen);
@ -574,8 +590,9 @@ public final class SegmentInfo {
  /** Used for debugging.  Format may suddenly change.
   * 
   *  <p>Current format looks like
-   *  <code>_a:c45/4->_1</code>, which means the segment's
-   *  name is <code>_a</code>; it's using compound file
+   *  <code>_a(3.1):c45/4->_1</code>, which means the segment's
+   *  name is <code>_a</code>; it was created with Lucene 3.1 (or
+   *  '?' if it's unkown); it's using compound file
   *  format (would be <code>C</code> if not compound); it
   *  has 45 documents; it has 4 deletions (this part is
   *  left off when there are no deletions); it's using the
@ -585,7 +602,7 @@ public final class SegmentInfo {
  public String toString(Directory dir, int pendingDelCount) {

    StringBuilder s = new StringBuilder();
-    s.append(name).append(':');
+    s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');

    char cfs = getUseCompoundFile() ? 'c' : 'C';
    s.append(cfs);
@ -633,4 +650,25 @@ public final class SegmentInfo {
  public int hashCode() {
    return dir.hashCode() + name.hashCode();
  }
+
+  /**
+   * Used by DefaultSegmentInfosReader to upgrade a 3.0 segment to record its
+   * version is "3.0". This method can be removed when we're not required to
+   * support 3x indexes anymore, e.g. in 5.0.
+   * <p>
+   * <b>NOTE:</b> this method is used for internal purposes only - you should
+   * not modify the version of a SegmentInfo, or it may result in unexpected
+   * exceptions thrown when you attempt to open the index.
+   * 
+   * @lucene.internal
+   */
+  public void setVersion(String version) {
+    this.version = version;
+  }
+  
+  /** Returns the version of the code which wrote the segment. */
+  public String getVersion() {
+    return version;
+  }
+  
 }
--- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java
@ -226,6 +226,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
          assert storeDir != null;
        }

+        // nocommit: this can be simplified to always be si.getDocStoreSegment()
        final String storesSegment;
        if (si.getDocStoreOffset() != -1) {
          storesSegment = si.getDocStoreSegment();
--- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosReader.java
@ -19,7 +19,10 @@ package org.apache.lucene.index.codecs;

 import java.io.IOException;

+import org.apache.lucene.index.CompoundFileReader;
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldsReader;
+import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexFormatTooOldException;
 import org.apache.lucene.index.IndexFormatTooNewException;
 import org.apache.lucene.index.SegmentInfo;
@ -55,7 +58,41 @@ public class DefaultSegmentInfosReader extends SegmentInfosReader {
      infos.counter = input.readInt(); // read counter
  
      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
-        infos.add(new SegmentInfo(directory, format, input, codecs));
+        SegmentInfo si = new SegmentInfo(directory, format, input, codecs);
+        if (si.getVersion() == null) {
+          // Could be a 3.0 - try to open the doc stores - if it fails, it's a
+          // 2.x segment, and an IndexFormatTooOldException will be thrown,
+          // which is what we want.
+          Directory dir = directory;
+          if (si.getDocStoreOffset() != -1) {
+            if (si.getDocStoreIsCompoundFile()) {
+              dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName(
+                  si.getDocStoreSegment(), "",
+                  IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024);
+            }
+          } else if (si.getUseCompoundFile()) {
+            dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName(
+                si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), 1024);
+          }
+
+          try {
+            FieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
+          } finally {
+            // If we opened the directory, close it
+            if (dir != directory) dir.close();
+          }
+          
+          // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next
+          // time the segment is read, its version won't be null and we won't
+          // need to open FieldsReader every time for each such segment.
+          si.setVersion("3.0");
+        } else if (si.getVersion().equals("2.x")) {
+          // If it's a 3x index touched by 3.1+ code, then segments record their
+          // version, whether they are 2.x ones or not. We detect that and throw
+          // appropriate exception.
+          throw new IndexFormatTooOldException(si.name, si.getVersion());
+        }
+        infos.add(si);
      }
      
      infos.userData = input.readStringStringMap();
--- a/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
@ -38,9 +38,12 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
  /** Each segment records whether it has term vectors */
  public static final int FORMAT_HAS_VECTORS = -10;

+  /** Each segment records the Lucene version that created it. */
+  public static final int FORMAT_3_1 = -11;
+
  /** Each segment records whether its postings are written
   *  in the new flex format */
-  public static final int FORMAT_4_0 = -11;
+  public static final int FORMAT_4_0 = -12;

  /** This must always point to the most recent file format.
   * whenever you add a new format, make it 1 smaller (negative version logic)! */
--- a/lucene/src/java/org/apache/lucene/util/Constants.java
+++ b/lucene/src/java/org/apache/lucene/util/Constants.java
@ -70,6 +70,9 @@ public final class Constants {
    return s.toString();
  }
  
+  // NOTE: we track per-segment version as a String with the "X.Y" format, e.g.
+  // "4.0", "3.1", "3.0". Therefore when we change this constant, we should keep
+  // the format.
  public static final String LUCENE_MAIN_VERSION = ident("4.0");

  public static final String LUCENE_VERSION;
--- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -171,15 +171,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {

      try {
        writer = new IndexWriter(dir, newIndexWriterConfig(
-          TEST_VERSION_CURRENT, new MockAnalyzer())
-          .setMergeScheduler(new SerialMergeScheduler()) // no threads!
-        );
-        // TODO: Make IndexWriter fail on open!
-        if (random.nextBoolean()) {
-          writer.optimize();
-        } else {
-          reader = writer.getReader();
-        }
+          TEST_VERSION_CURRENT, new MockAnalyzer()));
        fail("IndexWriter creation should not pass for "+unsupportedNames[i]);
      } catch (IndexFormatTooOldException e) {
        // pass
@ -188,18 +180,14 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
          e.printStackTrace(System.out);
        }
      } finally {
-        if (reader != null) reader.close();
-        reader = null;
+        // we should fail to open IW, and so it should be null when we get here.
+        // However, if the test fails (i.e., IW did not fail on open), we need
+        // to close IW. However, if merges are run, IW may throw
+        // IndexFormatTooOldException, and we don't want to mask the fail()
+        // above, so close without waiting for merges.
        if (writer != null) {
-          try {
-            writer.close();
-          } catch (IndexFormatTooOldException e) {
-            // OK -- since IW gives merge scheduler a chance
-            // to merge at close, it's possible and fine to
-            // hit this exc here
          writer.close(false);
        }
-        }
        writer = null;
      }
      
--- a/lucene/src/test/org/apache/lucene/index/index.31.cfs.zip
+++ b/lucene/src/test/org/apache/lucene/index/index.31.cfs.zip
--- a/lucene/src/test/org/apache/lucene/index/index.31.nocfs.zip
+++ b/lucene/src/test/org/apache/lucene/index/index.31.nocfs.zip