From c1ae6dc07c9a988533cbe7176bdeb49e2fca1d9c Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 19 Jan 2021 09:23:49 +0100 Subject: [PATCH] LUCENE-9669: Add an expert API to allow opening indices created < N-1 (#2212) Today we force indices that were created with N-2 and older versions of Lucene to fail on open. This check doesn't even check if the codecs are available. In order to allow users to open older indices and for us to support N-2 versions this change adds an API on DirectoryReader to specify a minimum index version on a per reader basis. This doesn't apply for the IndexWriter which will fail on opening older indices. --- .../TestBackwardsCompatibility.java | 50 ++++++++++++++++++- .../org/apache/lucene/index/CheckIndex.java | 4 +- .../apache/lucene/index/DirectoryReader.java | 21 +++++++- .../index/IndexFormatTooOldException.java | 9 +++- .../org/apache/lucene/index/IndexWriter.java | 8 +++ .../lucene/index/ParallelLeafReader.java | 1 + .../org/apache/lucene/index/SegmentInfos.java | 31 ++++++++++-- .../lucene/index/StandardDirectoryReader.java | 19 ++++++- .../java/org/apache/lucene/util/Version.java | 6 +++ .../lucene/index/TestDirectoryReader.java | 15 ++++++ .../index/SlowCompositeReaderWrapper.java | 3 +- 11 files changed, 153 insertions(+), 14 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java index 498039769e3..ea155622e2a 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java @@ -62,6 +62,7 @@ import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; @@ -87,6 +88,7 @@ import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.StandardDirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -863,7 +865,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase { checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8)); CheckIndex.Status indexStatus = checker.checkIndex(); assertFalse(indexStatus.clean); - assertTrue(bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName())); + if (unsupportedNames[i].startsWith("7.")) { + assertTrue(bos.toString(IOUtils.UTF_8).contains("Could not load codec 'Lucene70'")); + } else { + assertTrue( + bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName())); + } checker.close(); dir.close(); @@ -1986,4 +1993,45 @@ public class TestBackwardsCompatibility extends LuceneTestCase { bytes.bytes[bytes.length++] = (byte) value; return bytes; } + + public void testFailOpenOldIndex() throws IOException { + for (String name : oldNames) { + Directory directory = oldIndexDirs.get(name); + IndexCommit commit = DirectoryReader.listCommits(directory).get(0); + IndexFormatTooOldException ex = + expectThrows( + IndexFormatTooOldException.class, + () -> StandardDirectoryReader.open(commit, Version.LATEST.major)); + assertTrue( + ex.getMessage() + .contains( + "only supports reading from version " + Version.LATEST.major + " upwards.")); + // now open with allowed min version + StandardDirectoryReader.open(commit, Version.LATEST.major - 1).close(); + } + } + + public void testReadNMinusTwoCommit() throws IOException { + for (String name : this.unsupportedNames) { + if (name.startsWith(Version.MIN_SUPPORTED_MAJOR - 1 + ".")) { + Path oldIndexDir = createTempDir(name); + TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir); + try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) { + // don't checkindex, we don't have the codecs yet + dir.setCheckIndexOnClose(false); + IllegalArgumentException iae = + expectThrows(IllegalArgumentException.class, () -> DirectoryReader.listCommits(dir)); + // TODO fix this once we have the codec for 7.0 recreated + assertEquals( + "Could not load codec 'Lucene70'. Did you forget to add lucene-backward-codecs.jar?", + iae.getMessage()); + IllegalArgumentException ex = + expectThrows(IllegalArgumentException.class, () -> DirectoryReader.listCommits(dir)); + assertEquals( + "Could not load codec 'Lucene70'. Did you forget to add lucene-backward-codecs.jar?", + ex.getMessage()); + } + } + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 5292cca4f93..393135d47fd 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -506,7 +506,9 @@ public final class CheckIndex implements Closeable { try { // Do not use SegmentInfos.read(Directory) since the spooky // retrying it does is not necessary here (we hold the write lock): - sis = SegmentInfos.readCommit(dir, lastSegmentsFile); + sis = + SegmentInfos.readCommit( + dir, lastSegmentsFile, 0 /* always open old indices if codecs are around */); } catch (Throwable t) { if (failFast) { throw IOUtils.rethrowAlways(t); diff --git a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java index 2207740d427..20f1e2b674a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java @@ -104,6 +104,23 @@ public abstract class DirectoryReader extends BaseCompositeReader { return StandardDirectoryReader.open(commit.getDirectory(), commit); } + /** + * Expert: returns an IndexReader reading the index on the given {@link IndexCommit}. This method + * allows to open indices that were created wih a Lucene version older than N-1 provided that all + * codecs for this index are available in the classpath and the segment file format used was + * created with Lucene 7 or newer. Users of this API must be aware that Lucene doesn't guarantee + * semantic compatibility for indices created with versions older than N-1. All backwards + * compatibility aside from the file format is optional and applied on a best effort basis. + * + * @param commit the commit point to open + * @param minSupportedMajorVersion the minimum supported major index version + * @throws IOException if there is a low-level IO error + */ + public static DirectoryReader open(final IndexCommit commit, int minSupportedMajorVersion) + throws IOException { + return StandardDirectoryReader.open(commit.getDirectory(), minSupportedMajorVersion, commit); + } + /** * If the index has changed since the provided reader was opened, open and return a new reader; * else, return null. The new reader, if not null, will be the same type of reader as the previous @@ -221,7 +238,7 @@ public abstract class DirectoryReader extends BaseCompositeReader { List commits = new ArrayList<>(); - SegmentInfos latest = SegmentInfos.readLatestCommit(dir); + SegmentInfos latest = SegmentInfos.readLatestCommit(dir, 0); final long currentGen = latest.getGeneration(); commits.add(new StandardDirectoryReader.ReaderCommit(null, latest, dir)); @@ -237,7 +254,7 @@ public abstract class DirectoryReader extends BaseCompositeReader { try { // IOException allowed to throw there, in case // segments_N is corrupt - sis = SegmentInfos.readCommit(dir, fileName); + sis = SegmentInfos.readCommit(dir, fileName, 0); } catch (FileNotFoundException | NoSuchFileException fnfe) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexFormatTooOldException.java b/lucene/core/src/java/org/apache/lucene/index/IndexFormatTooOldException.java index 737069932e1..f2ac3714c17 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexFormatTooOldException.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexFormatTooOldException.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Objects; import org.apache.lucene.store.DataInput; +import org.apache.lucene.util.Version; /** This exception is thrown when Lucene detects an index that is too old for this Lucene version */ public class IndexFormatTooOldException extends IOException { @@ -42,7 +43,9 @@ public class IndexFormatTooOldException extends IOException { + resourceDescription + "): " + reason - + ". This version of Lucene only supports indexes created with release 8.0 and later."); + + ". This version of Lucene only supports indexes created with release " + + Version.MIN_SUPPORTED_MAJOR + + ".0 and later by default."); this.resourceDescription = resourceDescription; this.reason = reason; this.version = null; @@ -81,7 +84,9 @@ public class IndexFormatTooOldException extends IOException { + minVersion + " and " + maxVersion - + "). This version of Lucene only supports indexes created with release 8.0 and later."); + + "). This version of Lucene only supports indexes created with release " + + Version.MIN_SUPPORTED_MAJOR + + ".0 and later."); this.resourceDescription = resourceDescription; this.version = version; this.minVersion = minVersion; diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 8cbd9bdd1c4..3e5da9fa67e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1009,6 +1009,14 @@ public class IndexWriter changed(); } else if (reader != null) { + if (reader.segmentInfos.getIndexCreatedVersionMajor() < Version.MIN_SUPPORTED_MAJOR) { + // second line of defence in the case somebody tries to trick us. + throw new IllegalArgumentException( + "createdVersionMajor must be >= " + + Version.MIN_SUPPORTED_MAJOR + + ", got: " + + reader.segmentInfos.getIndexCreatedVersionMajor()); + } // Init from an existing already opened NRT or non-NRT reader: if (reader.directory() != commit.getDirectory()) { diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java index 0d0ac707b07..1316c3bdc18 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java @@ -170,6 +170,7 @@ public class ParallelLeafReader extends LeafReader { Version minVersion = Version.LATEST; for (final LeafReader reader : this.parallelReaders) { Version leafVersion = reader.getMetaData().getMinVersion(); + if (leafVersion == null) { minVersion = null; break; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index a2a089fafad..7e7d7cb10c5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -285,12 +285,18 @@ public final class SegmentInfos implements Cloneable, Iterable(directory) { @Override protected SegmentInfos doBody(String segmentFileName) throws IOException { - return readCommit(directory, segmentFileName); + return readCommit(directory, segmentFileName, minSupportedMajorVersion); } }.run(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java index 23f85e7340b..bd9e199e094 100644 --- a/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java @@ -32,6 +32,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.Version; /** Default implementation of {@link DirectoryReader}. */ public final class StandardDirectoryReader extends DirectoryReader { @@ -57,13 +58,27 @@ public final class StandardDirectoryReader extends DirectoryReader { this.writeAllDeletes = writeAllDeletes; } - /** called from DirectoryReader.open(...) methods */ static DirectoryReader open(final Directory directory, final IndexCommit commit) throws IOException { + return open(directory, Version.MIN_SUPPORTED_MAJOR, commit); + } + + /** called from DirectoryReader.open(...) methods */ + static DirectoryReader open( + final Directory directory, int minSupportedMajorVersion, final IndexCommit commit) + throws IOException { return new SegmentInfos.FindSegmentsFile(directory) { @Override protected DirectoryReader doBody(String segmentFileName) throws IOException { - SegmentInfos sis = SegmentInfos.readCommit(directory, segmentFileName); + if (minSupportedMajorVersion > Version.LATEST.major || minSupportedMajorVersion < 0) { + throw new IllegalArgumentException( + "minSupportedMajorVersion must be positive and <= " + + Version.LATEST.major + + " but was: " + + minSupportedMajorVersion); + } + SegmentInfos sis = + SegmentInfos.readCommit(directory, segmentFileName, minSupportedMajorVersion); final SegmentReader[] readers = new SegmentReader[sis.size()]; boolean success = false; try { diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java index afc8a1b1fd6..f1be05e2c73 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Version.java +++ b/lucene/core/src/java/org/apache/lucene/util/Version.java @@ -163,6 +163,12 @@ public final class Version { */ @Deprecated public static final Version LUCENE_CURRENT = LATEST; + /** + * Constant for the minimal supported major version of an index. This version is defined by the + * version that initially created the index. + */ + public static final int MIN_SUPPORTED_MAJOR = Version.LATEST.major - 1; + /** * Parse a version number of the form {@code "major.minor.bugfix.prerelease"}. * diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java index 4049de3cb3e..1b74790c01d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java @@ -46,6 +46,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; +import org.apache.lucene.util.Version; import org.junit.Assume; @LuceneTestCase.SuppressCodecs("SimpleText") @@ -1096,4 +1097,18 @@ public class TestDirectoryReader extends LuceneTestCase { assertFalse(DirectoryReader.indexExists(dir)); dir.close(); } + + public void testOpenWithInvalidMinCompatVersion() throws IOException { + try (Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(newStringField("field1", "foobar", Field.Store.YES)); + doc.add(newStringField("field2", "foobaz", Field.Store.YES)); + writer.addDocument(doc); + writer.commit(); + IndexCommit commit = DirectoryReader.listCommits(dir).get(0); + expectThrows(IllegalArgumentException.class, () -> DirectoryReader.open(commit, -1)); + DirectoryReader.open(commit, random().nextInt(Version.LATEST.major + 1)).close(); + } + } } diff --git a/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java index 33c23a17ddd..fce1bc5fc8e 100644 --- a/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java +++ b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java @@ -88,7 +88,8 @@ public final class SlowCompositeReaderWrapper extends LeafReader { minVersion = leafVersion; } } - metaData = new LeafMetaData(reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(), minVersion, null); + int createdVersionMajor = reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(); + metaData = new LeafMetaData(createdVersionMajor, minVersion, null); } fieldInfos = FieldInfos.getMergedFieldInfos(in); }