mirror of https://github.com/apache/lucene.git
LUCENE-2720: IndexWriter should throw IndexFormatTooOldExc on open, not later during optimize/getReader/close (trunk)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1062325 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
22f0fe9718
commit
133e70cad6
|
@ -141,6 +141,9 @@ Changes in Runtime Behavior
|
|||
would populate "fake norms" with Similarity.getDefault() for these documents.
|
||||
(Robert Muir, Mike Mccandless)
|
||||
|
||||
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
|
||||
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
|
||||
|
|
|
@ -37,8 +37,10 @@ import java.io.Reader;
|
|||
* Class responsible for access to stored document fields.
|
||||
* <p/>
|
||||
* It uses <segment>.fdt and <segment>.fdx; files.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
final class FieldsReader implements Cloneable {
|
||||
public final class FieldsReader implements Cloneable {
|
||||
private final static int FORMAT_SIZE = 4;
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
|
@ -75,6 +77,23 @@ final class FieldsReader implements Cloneable {
|
|||
return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
|
||||
}
|
||||
|
||||
/** Verifies that the code version which wrote the segment is supported. */
|
||||
public static void checkCodeVersion(Directory dir, String segment) throws IOException {
|
||||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION);
|
||||
IndexInput idxStream = dir.openInput(indexStreamFN, 1024);
|
||||
|
||||
try {
|
||||
int format = idxStream.readInt();
|
||||
if (format < FieldsWriter.FORMAT_MINIMUM)
|
||||
throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
|
||||
if (format > FieldsWriter.FORMAT_CURRENT)
|
||||
throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
|
||||
} finally {
|
||||
idxStream.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Used only by clone
|
||||
private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
|
||||
IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) {
|
||||
|
@ -89,11 +108,11 @@ final class FieldsReader implements Cloneable {
|
|||
indexStream = (IndexInput) cloneableIndexStream.clone();
|
||||
}
|
||||
|
||||
FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
|
||||
public FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
|
||||
this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0);
|
||||
}
|
||||
|
||||
FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
|
||||
public FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
|
||||
boolean success = false;
|
||||
isOriginal = true;
|
||||
try {
|
||||
|
@ -157,7 +176,7 @@ final class FieldsReader implements Cloneable {
|
|||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
final void close() throws IOException {
|
||||
public final void close() throws IOException {
|
||||
if (!closed) {
|
||||
if (fieldsStream != null) {
|
||||
fieldsStream.close();
|
||||
|
@ -178,7 +197,7 @@ final class FieldsReader implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
final int size() {
|
||||
public final int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
|
@ -186,7 +205,7 @@ final class FieldsReader implements Cloneable {
|
|||
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
|
||||
}
|
||||
|
||||
final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
seekIndex(n);
|
||||
long position = indexStream.readLong();
|
||||
fieldsStream.seek(position);
|
||||
|
@ -237,7 +256,7 @@ final class FieldsReader implements Cloneable {
|
|||
* contiguous range of length numDocs starting with
|
||||
* startDocID. Returns the IndexInput (the fieldStream),
|
||||
* already seeked to the starting point for startDocID.*/
|
||||
final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
|
||||
public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
|
||||
seekIndex(startDocID);
|
||||
long startOffset = indexStream.readLong();
|
||||
long lastOffset = startOffset;
|
||||
|
|
|
@ -23,6 +23,11 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
public class IndexFormatTooOldException extends CorruptIndexException {
|
||||
|
||||
public IndexFormatTooOldException(String filename, String version) {
|
||||
super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") +
|
||||
": " + version + ". This version of Lucene only supports indexes created with release 3.0 and later.");
|
||||
}
|
||||
|
||||
public IndexFormatTooOldException(String filename, int version, int minVersion, int maxVersion) {
|
||||
super("Format version is not supported" + (filename!=null ? (" in file '" + filename + "'") : "") +
|
||||
": " + version + " (needs to be between " + minVersion + " and " + maxVersion +
|
||||
|
|
|
@ -605,8 +605,6 @@ public class IndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Obtain the number of deleted docs for a pooled reader.
|
||||
* If the reader isn't being pooled, the segmentInfo's
|
||||
|
@ -715,11 +713,8 @@ public class IndexWriter implements Closeable {
|
|||
|
||||
boolean success = false;
|
||||
|
||||
// TODO: we should check whether this index is too old,
|
||||
// and throw an IndexFormatTooOldExc up front, here,
|
||||
// instead of later when merge, applyDeletes, getReader
|
||||
// is attempted. I think to do this we should store the
|
||||
// oldest segment's version in segments_N.
|
||||
// If index is too old, reading the segments will throw
|
||||
// IndexFormatTooOldException.
|
||||
segmentInfos = new SegmentInfos(codecs);
|
||||
try {
|
||||
if (create) {
|
||||
|
@ -982,6 +977,7 @@ public class IndexWriter implements Closeable {
|
|||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
@Override
|
||||
public void close() throws CorruptIndexException, IOException {
|
||||
close(true);
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
|
||||
|
@ -87,6 +88,13 @@ public final class SegmentInfo {
|
|||
|
||||
private Map<String,String> diagnostics;
|
||||
|
||||
// Tracks the Lucene version this segment was created with, since 3.1. Null
|
||||
// indicates an older than 3.0 index, and it's used to detect a too old index.
|
||||
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
|
||||
// specific versions afterwards ("3.0", "3.1" etc.).
|
||||
// see Constants.LUCENE_MAIN_VERSION.
|
||||
private String version;
|
||||
|
||||
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
|
||||
boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors) {
|
||||
this.name = name;
|
||||
|
@ -99,6 +107,7 @@ public final class SegmentInfo {
|
|||
this.segmentCodecs = segmentCodecs;
|
||||
this.hasVectors = hasVectors;
|
||||
delCount = 0;
|
||||
version = Constants.LUCENE_MAIN_VERSION;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -106,6 +115,7 @@ public final class SegmentInfo {
|
|||
*/
|
||||
void reset(SegmentInfo src) {
|
||||
clearFiles();
|
||||
version = src.version;
|
||||
name = src.name;
|
||||
docCount = src.docCount;
|
||||
dir = src.dir;
|
||||
|
@ -145,6 +155,9 @@ public final class SegmentInfo {
|
|||
*/
|
||||
public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
|
||||
this.dir = dir;
|
||||
if (format <= DefaultSegmentInfosWriter.FORMAT_3_1) {
|
||||
version = input.readString();
|
||||
}
|
||||
name = input.readString();
|
||||
docCount = input.readInt();
|
||||
delGen = input.readLong();
|
||||
|
@ -293,6 +306,7 @@ public final class SegmentInfo {
|
|||
si.normGen = normGen.clone();
|
||||
}
|
||||
si.hasVectors = hasVectors;
|
||||
si.version = version;
|
||||
return si;
|
||||
}
|
||||
|
||||
|
@ -433,6 +447,8 @@ public final class SegmentInfo {
|
|||
public void write(IndexOutput output)
|
||||
throws IOException {
|
||||
assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
|
||||
// Write the Lucene version that created this segment, since 3.1
|
||||
output.writeString(version);
|
||||
output.writeString(name);
|
||||
output.writeInt(docCount);
|
||||
output.writeLong(delGen);
|
||||
|
@ -574,8 +590,9 @@ public final class SegmentInfo {
|
|||
/** Used for debugging. Format may suddenly change.
|
||||
*
|
||||
* <p>Current format looks like
|
||||
* <code>_a:c45/4->_1</code>, which means the segment's
|
||||
* name is <code>_a</code>; it's using compound file
|
||||
* <code>_a(3.1):c45/4->_1</code>, which means the segment's
|
||||
* name is <code>_a</code>; it was created with Lucene 3.1 (or
|
||||
* '?' if it's unkown); it's using compound file
|
||||
* format (would be <code>C</code> if not compound); it
|
||||
* has 45 documents; it has 4 deletions (this part is
|
||||
* left off when there are no deletions); it's using the
|
||||
|
@ -585,7 +602,7 @@ public final class SegmentInfo {
|
|||
public String toString(Directory dir, int pendingDelCount) {
|
||||
|
||||
StringBuilder s = new StringBuilder();
|
||||
s.append(name).append(':');
|
||||
s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
|
||||
|
||||
char cfs = getUseCompoundFile() ? 'c' : 'C';
|
||||
s.append(cfs);
|
||||
|
@ -633,4 +650,25 @@ public final class SegmentInfo {
|
|||
public int hashCode() {
|
||||
return dir.hashCode() + name.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by DefaultSegmentInfosReader to upgrade a 3.0 segment to record its
|
||||
* version is "3.0". This method can be removed when we're not required to
|
||||
* support 3x indexes anymore, e.g. in 5.0.
|
||||
* <p>
|
||||
* <b>NOTE:</b> this method is used for internal purposes only - you should
|
||||
* not modify the version of a SegmentInfo, or it may result in unexpected
|
||||
* exceptions thrown when you attempt to open the index.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public void setVersion(String version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
/** Returns the version of the code which wrote the segment. */
|
||||
public String getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -226,6 +226,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
assert storeDir != null;
|
||||
}
|
||||
|
||||
// nocommit: this can be simplified to always be si.getDocStoreSegment()
|
||||
final String storesSegment;
|
||||
if (si.getDocStoreOffset() != -1) {
|
||||
storesSegment = si.getDocStoreSegment();
|
||||
|
|
|
@ -19,7 +19,10 @@ package org.apache.lucene.index.codecs;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.CompoundFileReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldsReader;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
|
@ -55,7 +58,41 @@ public class DefaultSegmentInfosReader extends SegmentInfosReader {
|
|||
infos.counter = input.readInt(); // read counter
|
||||
|
||||
for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
|
||||
infos.add(new SegmentInfo(directory, format, input, codecs));
|
||||
SegmentInfo si = new SegmentInfo(directory, format, input, codecs);
|
||||
if (si.getVersion() == null) {
|
||||
// Could be a 3.0 - try to open the doc stores - if it fails, it's a
|
||||
// 2.x segment, and an IndexFormatTooOldException will be thrown,
|
||||
// which is what we want.
|
||||
Directory dir = directory;
|
||||
if (si.getDocStoreOffset() != -1) {
|
||||
if (si.getDocStoreIsCompoundFile()) {
|
||||
dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName(
|
||||
si.getDocStoreSegment(), "",
|
||||
IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024);
|
||||
}
|
||||
} else if (si.getUseCompoundFile()) {
|
||||
dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName(
|
||||
si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), 1024);
|
||||
}
|
||||
|
||||
try {
|
||||
FieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
|
||||
} finally {
|
||||
// If we opened the directory, close it
|
||||
if (dir != directory) dir.close();
|
||||
}
|
||||
|
||||
// Above call succeeded, so it's a 3.0 segment. Upgrade it so the next
|
||||
// time the segment is read, its version won't be null and we won't
|
||||
// need to open FieldsReader every time for each such segment.
|
||||
si.setVersion("3.0");
|
||||
} else if (si.getVersion().equals("2.x")) {
|
||||
// If it's a 3x index touched by 3.1+ code, then segments record their
|
||||
// version, whether they are 2.x ones or not. We detect that and throw
|
||||
// appropriate exception.
|
||||
throw new IndexFormatTooOldException(si.name, si.getVersion());
|
||||
}
|
||||
infos.add(si);
|
||||
}
|
||||
|
||||
infos.userData = input.readStringStringMap();
|
||||
|
|
|
@ -38,9 +38,12 @@ public class DefaultSegmentInfosWriter extends SegmentInfosWriter {
|
|||
/** Each segment records whether it has term vectors */
|
||||
public static final int FORMAT_HAS_VECTORS = -10;
|
||||
|
||||
/** Each segment records the Lucene version that created it. */
|
||||
public static final int FORMAT_3_1 = -11;
|
||||
|
||||
/** Each segment records whether its postings are written
|
||||
* in the new flex format */
|
||||
public static final int FORMAT_4_0 = -11;
|
||||
public static final int FORMAT_4_0 = -12;
|
||||
|
||||
/** This must always point to the most recent file format.
|
||||
* whenever you add a new format, make it 1 smaller (negative version logic)! */
|
||||
|
|
|
@ -70,6 +70,9 @@ public final class Constants {
|
|||
return s.toString();
|
||||
}
|
||||
|
||||
// NOTE: we track per-segment version as a String with the "X.Y" format, e.g.
|
||||
// "4.0", "3.1", "3.0". Therefore when we change this constant, we should keep
|
||||
// the format.
|
||||
public static final String LUCENE_MAIN_VERSION = ident("4.0");
|
||||
|
||||
public static final String LUCENE_VERSION;
|
||||
|
|
|
@ -171,15 +171,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
|
||||
try {
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer())
|
||||
.setMergeScheduler(new SerialMergeScheduler()) // no threads!
|
||||
);
|
||||
// TODO: Make IndexWriter fail on open!
|
||||
if (random.nextBoolean()) {
|
||||
writer.optimize();
|
||||
} else {
|
||||
reader = writer.getReader();
|
||||
}
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
fail("IndexWriter creation should not pass for "+unsupportedNames[i]);
|
||||
} catch (IndexFormatTooOldException e) {
|
||||
// pass
|
||||
|
@ -188,18 +180,14 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
e.printStackTrace(System.out);
|
||||
}
|
||||
} finally {
|
||||
if (reader != null) reader.close();
|
||||
reader = null;
|
||||
// we should fail to open IW, and so it should be null when we get here.
|
||||
// However, if the test fails (i.e., IW did not fail on open), we need
|
||||
// to close IW. However, if merges are run, IW may throw
|
||||
// IndexFormatTooOldException, and we don't want to mask the fail()
|
||||
// above, so close without waiting for merges.
|
||||
if (writer != null) {
|
||||
try {
|
||||
writer.close();
|
||||
} catch (IndexFormatTooOldException e) {
|
||||
// OK -- since IW gives merge scheduler a chance
|
||||
// to merge at close, it's possible and fine to
|
||||
// hit this exc here
|
||||
writer.close(false);
|
||||
}
|
||||
}
|
||||
writer = null;
|
||||
}
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue