LUCENE-2720: IndexWriter should throw IndexFormatTooOldExc on open, not later during optimize/getReader/close (trunk)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1062325 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2011-01-23 05:10:48 +00:00
parent 22f0fe9718
commit 133e70cad6
12 changed files with 133 additions and 40 deletions

View File

@ -141,6 +141,9 @@ Changes in Runtime Behavior
would populate "fake norms" with Similarity.getDefault() for these documents. would populate "fake norms" with Similarity.getDefault() for these documents.
(Robert Muir, Mike Mccandless) (Robert Muir, Mike Mccandless)
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
API Changes API Changes
* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer * LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer

View File

@ -37,8 +37,10 @@ import java.io.Reader;
* Class responsible for access to stored document fields. * Class responsible for access to stored document fields.
* <p/> * <p/>
* It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files. * It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
*
* @lucene.internal
*/ */
final class FieldsReader implements Cloneable { public final class FieldsReader implements Cloneable {
private final static int FORMAT_SIZE = 4; private final static int FORMAT_SIZE = 4;
private final FieldInfos fieldInfos; private final FieldInfos fieldInfos;
@ -75,6 +77,23 @@ final class FieldsReader implements Cloneable {
return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
} }
/** Verifies that the code version which wrote the segment is supported. */
public static void checkCodeVersion(Directory dir, String segment) throws IOException {
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION);
IndexInput idxStream = dir.openInput(indexStreamFN, 1024);
try {
int format = idxStream.readInt();
if (format < FieldsWriter.FORMAT_MINIMUM)
throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
if (format > FieldsWriter.FORMAT_CURRENT)
throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
} finally {
idxStream.close();
}
}
// Used only by clone // Used only by clone
private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset, private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) { IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) {
@ -89,11 +108,11 @@ final class FieldsReader implements Cloneable {
indexStream = (IndexInput) cloneableIndexStream.clone(); indexStream = (IndexInput) cloneableIndexStream.clone();
} }
FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { public FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0); this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0);
} }
FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException { public FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
boolean success = false; boolean success = false;
isOriginal = true; isOriginal = true;
try { try {
@ -157,7 +176,7 @@ final class FieldsReader implements Cloneable {
* *
* @throws IOException * @throws IOException
*/ */
final void close() throws IOException { public final void close() throws IOException {
if (!closed) { if (!closed) {
if (fieldsStream != null) { if (fieldsStream != null) {
fieldsStream.close(); fieldsStream.close();
@ -178,7 +197,7 @@ final class FieldsReader implements Cloneable {
} }
} }
final int size() { public final int size() {
return size; return size;
} }
@ -186,7 +205,7 @@ final class FieldsReader implements Cloneable {
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
} }
final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
seekIndex(n); seekIndex(n);
long position = indexStream.readLong(); long position = indexStream.readLong();
fieldsStream.seek(position); fieldsStream.seek(position);
@ -237,7 +256,7 @@ final class FieldsReader implements Cloneable {
* contiguous range of length numDocs starting with * contiguous range of length numDocs starting with
* startDocID. Returns the IndexInput (the fieldStream), * startDocID. Returns the IndexInput (the fieldStream),
* already seeked to the starting point for startDocID.*/ * already seeked to the starting point for startDocID.*/
final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException { public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
seekIndex(startDocID); seekIndex(startDocID);
long startOffset = indexStream.readLong(); long startOffset = indexStream.readLong();
long lastOffset = startOffset; long lastOffset = startOffset;

View File

@ -23,10 +23,15 @@ package org.apache.lucene.index;
*/ */
public class IndexFormatTooOldException extends CorruptIndexException { public class IndexFormatTooOldException extends CorruptIndexException {
public IndexFormatTooOldException(String filename, String version) {
super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") +
": " + version + ". This version of Lucene only supports indexes created with release 3.0 and later.");
}
public IndexFormatTooOldException(String filename, int version, int minVersion, int maxVersion) { public IndexFormatTooOldException(String filename, int version, int minVersion, int maxVersion) {
super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") + super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") +
": " + version + " (needs to be between " + minVersion + " and " + maxVersion + ": " + version + " (needs to be between " + minVersion + " and " + maxVersion +
"). This version of Lucene only supports indexes created with release 3.0 and later."); "). This version of Lucene only supports indexes created with release 3.0 and later.");
} }
} }

View File

@ -605,8 +605,6 @@ public class IndexWriter implements Closeable {
} }
} }
/** /**
* Obtain the number of deleted docs for a pooled reader. * Obtain the number of deleted docs for a pooled reader.
* If the reader isn't being pooled, the segmentInfo's * If the reader isn't being pooled, the segmentInfo's
@ -715,11 +713,8 @@ public class IndexWriter implements Closeable {
boolean success = false; boolean success = false;
// TODO: we should check whether this index is too old, // If index is too old, reading the segments will throw
// and throw an IndexFormatTooOldExc up front, here, // IndexFormatTooOldException.
// instead of later when merge, applyDeletes, getReader
// is attempted. I think to do this we should store the
// oldest segment's version in segments_N.
segmentInfos = new SegmentInfos(codecs); segmentInfos = new SegmentInfos(codecs);
try { try {
if (create) { if (create) {
@ -982,6 +977,7 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
@Override
public void close() throws CorruptIndexException, IOException { public void close() throws CorruptIndexException, IOException {
close(true); close(true);
} }

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Constants;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
@ -87,6 +88,13 @@ public final class SegmentInfo {
private Map<String,String> diagnostics; private Map<String,String> diagnostics;
// Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
// specific versions afterwards ("3.0", "3.1" etc.).
// see Constants.LUCENE_MAIN_VERSION.
private String version;
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors) { boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors) {
this.name = name; this.name = name;
@ -99,6 +107,7 @@ public final class SegmentInfo {
this.segmentCodecs = segmentCodecs; this.segmentCodecs = segmentCodecs;
this.hasVectors = hasVectors; this.hasVectors = hasVectors;
delCount = 0; delCount = 0;
version = Constants.LUCENE_MAIN_VERSION;
} }
/** /**
@ -106,6 +115,7 @@ public final class SegmentInfo {
*/ */
void reset(SegmentInfo src) { void reset(SegmentInfo src) {
clearFiles(); clearFiles();
version = src.version;
name = src.name; name = src.name;
docCount = src.docCount; docCount = src.docCount;
dir = src.dir; dir = src.dir;
@ -145,6 +155,9 @@ public final class SegmentInfo {
*/ */
public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException { public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
this.dir = dir; this.dir = dir;
if (format <= DefaultSegmentInfosWriter.FORMAT_3_1) {
version = input.readString();
}
name = input.readString(); name = input.readString();
docCount = input.readInt(); docCount = input.readInt();
delGen = input.readLong(); delGen = input.readLong();
@ -293,6 +306,7 @@ public final class SegmentInfo {
si.normGen = normGen.clone(); si.normGen = normGen.clone();
} }
si.hasVectors = hasVectors; si.hasVectors = hasVectors;
si.version = version;
return si; return si;
} }
@ -433,6 +447,8 @@ public final class SegmentInfo {
public void write(IndexOutput output) public void write(IndexOutput output)
throws IOException { throws IOException {
assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name; assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
// Write the Lucene version that created this segment, since 3.1
output.writeString(version);
output.writeString(name); output.writeString(name);
output.writeInt(docCount); output.writeInt(docCount);
output.writeLong(delGen); output.writeLong(delGen);
@ -574,8 +590,9 @@ public final class SegmentInfo {
/** Used for debugging. Format may suddenly change. /** Used for debugging. Format may suddenly change.
* *
* <p>Current format looks like * <p>Current format looks like
* <code>_a:c45/4->_1</code>, which means the segment's * <code>_a(3.1):c45/4->_1</code>, which means the segment's
* name is <code>_a</code>; it's using compound file * name is <code>_a</code>; it was created with Lucene 3.1 (or
* '?' if it's unkown); it's using compound file
* format (would be <code>C</code> if not compound); it * format (would be <code>C</code> if not compound); it
* has 45 documents; it has 4 deletions (this part is * has 45 documents; it has 4 deletions (this part is
* left off when there are no deletions); it's using the * left off when there are no deletions); it's using the
@ -585,7 +602,7 @@ public final class SegmentInfo {
public String toString(Directory dir, int pendingDelCount) { public String toString(Directory dir, int pendingDelCount) {
StringBuilder s = new StringBuilder(); StringBuilder s = new StringBuilder();
s.append(name).append(':'); s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
char cfs = getUseCompoundFile() ? 'c' : 'C'; char cfs = getUseCompoundFile() ? 'c' : 'C';
s.append(cfs); s.append(cfs);
@ -633,4 +650,25 @@ public final class SegmentInfo {
public int hashCode() { public int hashCode() {
return dir.hashCode() + name.hashCode(); return dir.hashCode() + name.hashCode();
} }
/**
* Used by DefaultSegmentInfosReader to upgrade a 3.0 segment to record its
* version is "3.0". This method can be removed when we're not required to
* support 3x indexes anymore, e.g. in 5.0.
* <p>
* <b>NOTE:</b> this method is used for internal purposes only - you should
* not modify the version of a SegmentInfo, or it may result in unexpected
* exceptions thrown when you attempt to open the index.
*
* @lucene.internal
*/
public void setVersion(String version) {
this.version = version;
}
/** Returns the version of the code which wrote the segment. */
public String getVersion() {
return version;
}
} }

View File

@ -226,6 +226,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
assert storeDir != null; assert storeDir != null;
} }
// nocommit: this can be simplified to always be si.getDocStoreSegment()
final String storesSegment; final String storesSegment;
if (si.getDocStoreOffset() != -1) { if (si.getDocStoreOffset() != -1) {
storesSegment = si.getDocStoreSegment(); storesSegment = si.getDocStoreSegment();

View File

@ -19,7 +19,10 @@ package org.apache.lucene.index.codecs;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.CompoundFileReader;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldsReader;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
@ -55,7 +58,41 @@ public class DefaultSegmentInfosReader extends SegmentInfosReader {
infos.counter = input.readInt(); // read counter infos.counter = input.readInt(); // read counter
for (int i = input.readInt(); i > 0; i--) { // read segmentInfos for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
infos.add(new SegmentInfo(directory, format, input, codecs)); SegmentInfo si = new SegmentInfo(directory, format, input, codecs);
if (si.getVersion() == null) {
// Could be a 3.0 - try to open the doc stores - if it fails, it's a
// 2.x segment, and an IndexFormatTooOldException will be thrown,
// which is what we want.
Directory dir = directory;
if (si.getDocStoreOffset() != -1) {
if (si.getDocStoreIsCompoundFile()) {
dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName(
si.getDocStoreSegment(), "",
IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024);
}
} else if (si.getUseCompoundFile()) {
dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName(
si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), 1024);
}
try {
FieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
} finally {
// If we opened the directory, close it
if (dir != directory) dir.close();
}
// Above call succeeded, so it's a 3.0 segment. Upgrade it so the next
// time the segment is read, its version won't be null and we won't
// need to open FieldsReader every time for each such segment.
si.setVersion("3.0");
} else if (si.getVersion().equals("2.x")) {
// If it's a 3x index touched by 3.1+ code, then segments record their
// version, whether they are 2.x ones or not. We detect that and throw
// appropriate exception.
throw new IndexFormatTooOldException(si.name, si.getVersion());
}
infos.add(si);
} }
infos.userData = input.readStringStringMap(); infos.userData = input.readStringStringMap();

View File

@ -38,9 +38,12 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
/** Each segment records whether it has term vectors */ /** Each segment records whether it has term vectors */
public static final int FORMAT_HAS_VECTORS = -10; public static final int FORMAT_HAS_VECTORS = -10;
/** Each segment records the Lucene version that created it. */
public static final int FORMAT_3_1 = -11;
/** Each segment records whether its postings are written /** Each segment records whether its postings are written
* in the new flex format */ * in the new flex format */
public static final int FORMAT_4_0 = -11; public static final int FORMAT_4_0 = -12;
/** This must always point to the most recent file format. /** This must always point to the most recent file format.
* whenever you add a new format, make it 1 smaller (negative version logic)! */ * whenever you add a new format, make it 1 smaller (negative version logic)! */

View File

@ -70,6 +70,9 @@ public final class Constants {
return s.toString(); return s.toString();
} }
// NOTE: we track per-segment version as a String with the "X.Y" format, e.g.
// "4.0", "3.1", "3.0". Therefore when we change this constant, we should keep
// the format.
public static final String LUCENE_MAIN_VERSION = ident("4.0"); public static final String LUCENE_MAIN_VERSION = ident("4.0");
public static final String LUCENE_VERSION; public static final String LUCENE_VERSION;

View File

@ -171,15 +171,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
try { try {
writer = new IndexWriter(dir, newIndexWriterConfig( writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()) TEST_VERSION_CURRENT, new MockAnalyzer()));
.setMergeScheduler(new SerialMergeScheduler()) // no threads!
);
// TODO: Make IndexWriter fail on open!
if (random.nextBoolean()) {
writer.optimize();
} else {
reader = writer.getReader();
}
fail("IndexWriter creation should not pass for "+unsupportedNames[i]); fail("IndexWriter creation should not pass for "+unsupportedNames[i]);
} catch (IndexFormatTooOldException e) { } catch (IndexFormatTooOldException e) {
// pass // pass
@ -188,17 +180,13 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
e.printStackTrace(System.out); e.printStackTrace(System.out);
} }
} finally { } finally {
if (reader != null) reader.close(); // we should fail to open IW, and so it should be null when we get here.
reader = null; // However, if the test fails (i.e., IW did not fail on open), we need
// to close IW. However, if merges are run, IW may throw
// IndexFormatTooOldException, and we don't want to mask the fail()
// above, so close without waiting for merges.
if (writer != null) { if (writer != null) {
try { writer.close(false);
writer.close();
} catch (IndexFormatTooOldException e) {
// OK -- since IW gives merge scheduler a chance
// to merge at close, it's possible and fine to
// hit this exc here
writer.close(false);
}
} }
writer = null; writer = null;
} }