LUCENE-9669: Add an expert API to allow opening indices created < N-1 (#2212)

Today we force indices that were created with N-2 and older versions of Lucene
to fail on open. This check doesn't even check if the codecs are available. In order
to allow users to open older indices and for us to support N-2 versions this change
adds an API on DirectoryReader to specify a minimum index version on a per reader basis.
This doesn't apply for the IndexWriter which will fail on opening older indices.
This commit is contained in:
Simon Willnauer 2021-01-19 09:23:49 +01:00 committed by GitHub
parent 426c902bc9
commit c1ae6dc07c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 153 additions and 14 deletions

View File

@ -62,6 +62,7 @@ import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
@ -87,6 +88,7 @@ import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -863,7 +865,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
CheckIndex.Status indexStatus = checker.checkIndex();
assertFalse(indexStatus.clean);
assertTrue(bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName()));
if (unsupportedNames[i].startsWith("7.")) {
assertTrue(bos.toString(IOUtils.UTF_8).contains("Could not load codec 'Lucene70'"));
} else {
assertTrue(
bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName()));
}
checker.close();
dir.close();
@ -1986,4 +1993,45 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
bytes.bytes[bytes.length++] = (byte) value;
return bytes;
}
public void testFailOpenOldIndex() throws IOException {
for (String name : oldNames) {
Directory directory = oldIndexDirs.get(name);
IndexCommit commit = DirectoryReader.listCommits(directory).get(0);
IndexFormatTooOldException ex =
expectThrows(
IndexFormatTooOldException.class,
() -> StandardDirectoryReader.open(commit, Version.LATEST.major));
assertTrue(
ex.getMessage()
.contains(
"only supports reading from version " + Version.LATEST.major + " upwards."));
// now open with allowed min version
StandardDirectoryReader.open(commit, Version.LATEST.major - 1).close();
}
}
public void testReadNMinusTwoCommit() throws IOException {
for (String name : this.unsupportedNames) {
if (name.startsWith(Version.MIN_SUPPORTED_MAJOR - 1 + ".")) {
Path oldIndexDir = createTempDir(name);
TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir);
try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) {
// don't checkindex, we don't have the codecs yet
dir.setCheckIndexOnClose(false);
IllegalArgumentException iae =
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.listCommits(dir));
// TODO fix this once we have the codec for 7.0 recreated
assertEquals(
"Could not load codec 'Lucene70'. Did you forget to add lucene-backward-codecs.jar?",
iae.getMessage());
IllegalArgumentException ex =
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.listCommits(dir));
assertEquals(
"Could not load codec 'Lucene70'. Did you forget to add lucene-backward-codecs.jar?",
ex.getMessage());
}
}
}
}
}

View File

@ -506,7 +506,9 @@ public final class CheckIndex implements Closeable {
try {
// Do not use SegmentInfos.read(Directory) since the spooky
// retrying it does is not necessary here (we hold the write lock):
sis = SegmentInfos.readCommit(dir, lastSegmentsFile);
sis =
SegmentInfos.readCommit(
dir, lastSegmentsFile, 0 /* always open old indices if codecs are around */);
} catch (Throwable t) {
if (failFast) {
throw IOUtils.rethrowAlways(t);

View File

@ -104,6 +104,23 @@ public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> {
return StandardDirectoryReader.open(commit.getDirectory(), commit);
}
/**
* Expert: returns an IndexReader reading the index on the given {@link IndexCommit}. This method
* allows to open indices that were created wih a Lucene version older than N-1 provided that all
* codecs for this index are available in the classpath and the segment file format used was
* created with Lucene 7 or newer. Users of this API must be aware that Lucene doesn't guarantee
* semantic compatibility for indices created with versions older than N-1. All backwards
* compatibility aside from the file format is optional and applied on a best effort basis.
*
* @param commit the commit point to open
* @param minSupportedMajorVersion the minimum supported major index version
* @throws IOException if there is a low-level IO error
*/
public static DirectoryReader open(final IndexCommit commit, int minSupportedMajorVersion)
throws IOException {
return StandardDirectoryReader.open(commit.getDirectory(), minSupportedMajorVersion, commit);
}
/**
* If the index has changed since the provided reader was opened, open and return a new reader;
* else, return null. The new reader, if not null, will be the same type of reader as the previous
@ -221,7 +238,7 @@ public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> {
List<IndexCommit> commits = new ArrayList<>();
SegmentInfos latest = SegmentInfos.readLatestCommit(dir);
SegmentInfos latest = SegmentInfos.readLatestCommit(dir, 0);
final long currentGen = latest.getGeneration();
commits.add(new StandardDirectoryReader.ReaderCommit(null, latest, dir));
@ -237,7 +254,7 @@ public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> {
try {
// IOException allowed to throw there, in case
// segments_N is corrupt
sis = SegmentInfos.readCommit(dir, fileName);
sis = SegmentInfos.readCommit(dir, fileName, 0);
} catch (FileNotFoundException | NoSuchFileException fnfe) {
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.Version;
/** This exception is thrown when Lucene detects an index that is too old for this Lucene version */
public class IndexFormatTooOldException extends IOException {
@ -42,7 +43,9 @@ public class IndexFormatTooOldException extends IOException {
+ resourceDescription
+ "): "
+ reason
+ ". This version of Lucene only supports indexes created with release 8.0 and later.");
+ ". This version of Lucene only supports indexes created with release "
+ Version.MIN_SUPPORTED_MAJOR
+ ".0 and later by default.");
this.resourceDescription = resourceDescription;
this.reason = reason;
this.version = null;
@ -81,7 +84,9 @@ public class IndexFormatTooOldException extends IOException {
+ minVersion
+ " and "
+ maxVersion
+ "). This version of Lucene only supports indexes created with release 8.0 and later.");
+ "). This version of Lucene only supports indexes created with release "
+ Version.MIN_SUPPORTED_MAJOR
+ ".0 and later.");
this.resourceDescription = resourceDescription;
this.version = version;
this.minVersion = minVersion;

View File

@ -1009,6 +1009,14 @@ public class IndexWriter
changed();
} else if (reader != null) {
if (reader.segmentInfos.getIndexCreatedVersionMajor() < Version.MIN_SUPPORTED_MAJOR) {
// second line of defence in the case somebody tries to trick us.
throw new IllegalArgumentException(
"createdVersionMajor must be >= "
+ Version.MIN_SUPPORTED_MAJOR
+ ", got: "
+ reader.segmentInfos.getIndexCreatedVersionMajor());
}
// Init from an existing already opened NRT or non-NRT reader:
if (reader.directory() != commit.getDirectory()) {

View File

@ -170,6 +170,7 @@ public class ParallelLeafReader extends LeafReader {
Version minVersion = Version.LATEST;
for (final LeafReader reader : this.parallelReaders) {
Version leafVersion = reader.getMetaData().getMinVersion();
if (leafVersion == null) {
minVersion = null;
break;

View File

@ -285,12 +285,18 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
*/
public static final SegmentInfos readCommit(Directory directory, String segmentFileName)
throws IOException {
return readCommit(directory, segmentFileName, Version.MIN_SUPPORTED_MAJOR);
}
static final SegmentInfos readCommit(
Directory directory, String segmentFileName, int minSupportedMajorVersion)
throws IOException {
long generation = generationFromSegmentsFileName(segmentFileName);
// System.out.println(Thread.currentThread() + ": SegmentInfos.readCommit " + segmentFileName);
try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
try {
return readCommit(directory, input, generation);
return readCommit(directory, input, generation, minSupportedMajorVersion);
} catch (EOFException | NoSuchFileException | FileNotFoundException e) {
throw new CorruptIndexException(
"Unexpected file read error while reading index.", input, e);
@ -301,6 +307,13 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
/** Read the commit from the provided {@link ChecksumIndexInput}. */
public static final SegmentInfos readCommit(
Directory directory, ChecksumIndexInput input, long generation) throws IOException {
return readCommit(directory, input, generation, Version.MIN_SUPPORTED_MAJOR);
}
/** Read the commit from the provided {@link ChecksumIndexInput}. */
static final SegmentInfos readCommit(
Directory directory, ChecksumIndexInput input, long generation, int minSupportedMajorVersion)
throws IOException {
Throwable priorE = null;
int format = -1;
try {
@ -329,14 +342,17 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
input);
}
if (indexCreatedVersion < Version.LATEST.major - 1) {
if (indexCreatedVersion < minSupportedMajorVersion) {
throw new IndexFormatTooOldException(
input,
"This index was initially created with Lucene "
+ indexCreatedVersion
+ ".x while the current version is "
+ Version.LATEST
+ " and Lucene only supports reading the current and previous major versions.");
+ " and Lucene only supports reading"
+ (minSupportedMajorVersion == Version.MIN_SUPPORTED_MAJOR
? " the current and previous major versions"
: " from version " + minSupportedMajorVersion + " upwards"));
}
SegmentInfos infos = new SegmentInfos(indexCreatedVersion);
@ -499,7 +515,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
throw new IllegalArgumentException(
"Could not load codec '"
+ name
+ "'. Did you forget to add lucene-backward-codecs.jar?",
+ "'. Did you forget to add lucene-backward-codecs.jar?",
e);
}
throw e;
@ -508,10 +524,15 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
/** Find the latest commit ({@code segments_N file}) and load all {@link SegmentCommitInfo}s. */
public static final SegmentInfos readLatestCommit(Directory directory) throws IOException {
return readLatestCommit(directory, Version.MIN_SUPPORTED_MAJOR);
}
static final SegmentInfos readLatestCommit(Directory directory, int minSupportedMajorVersion)
throws IOException {
return new FindSegmentsFile<SegmentInfos>(directory) {
@Override
protected SegmentInfos doBody(String segmentFileName) throws IOException {
return readCommit(directory, segmentFileName);
return readCommit(directory, segmentFileName, minSupportedMajorVersion);
}
}.run();
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/** Default implementation of {@link DirectoryReader}. */
public final class StandardDirectoryReader extends DirectoryReader {
@ -57,13 +58,27 @@ public final class StandardDirectoryReader extends DirectoryReader {
this.writeAllDeletes = writeAllDeletes;
}
/** called from DirectoryReader.open(...) methods */
static DirectoryReader open(final Directory directory, final IndexCommit commit)
throws IOException {
return open(directory, Version.MIN_SUPPORTED_MAJOR, commit);
}
/** called from DirectoryReader.open(...) methods */
static DirectoryReader open(
final Directory directory, int minSupportedMajorVersion, final IndexCommit commit)
throws IOException {
return new SegmentInfos.FindSegmentsFile<DirectoryReader>(directory) {
@Override
protected DirectoryReader doBody(String segmentFileName) throws IOException {
SegmentInfos sis = SegmentInfos.readCommit(directory, segmentFileName);
if (minSupportedMajorVersion > Version.LATEST.major || minSupportedMajorVersion < 0) {
throw new IllegalArgumentException(
"minSupportedMajorVersion must be positive and <= "
+ Version.LATEST.major
+ " but was: "
+ minSupportedMajorVersion);
}
SegmentInfos sis =
SegmentInfos.readCommit(directory, segmentFileName, minSupportedMajorVersion);
final SegmentReader[] readers = new SegmentReader[sis.size()];
boolean success = false;
try {

View File

@ -163,6 +163,12 @@ public final class Version {
*/
@Deprecated public static final Version LUCENE_CURRENT = LATEST;
/**
* Constant for the minimal supported major version of an index. This version is defined by the
* version that initially created the index.
*/
public static final int MIN_SUPPORTED_MAJOR = Version.LATEST.major - 1;
/**
* Parse a version number of the form {@code "major.minor.bugfix.prerelease"}.
*

View File

@ -46,6 +46,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
import org.junit.Assume;
@LuceneTestCase.SuppressCodecs("SimpleText")
@ -1096,4 +1097,18 @@ public class TestDirectoryReader extends LuceneTestCase {
assertFalse(DirectoryReader.indexExists(dir));
dir.close();
}
public void testOpenWithInvalidMinCompatVersion() throws IOException {
try (Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) {
Document doc = new Document();
doc.add(newStringField("field1", "foobar", Field.Store.YES));
doc.add(newStringField("field2", "foobaz", Field.Store.YES));
writer.addDocument(doc);
writer.commit();
IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1));
DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1)).close();
}
}
}

View File

@ -88,7 +88,8 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
minVersion = leafVersion;
}
}
metaData = new LeafMetaData(reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(), minVersion, null);
int createdVersionMajor = reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor();
metaData = new LeafMetaData(createdVersionMajor, minVersion, null);
}
fieldInfos = FieldInfos.getMergedFieldInfos(in);
}