LUCENE-7703: Record the index creation version.

This commit is contained in:
Adrien Grand 2017-02-22 16:11:52 +01:00
parent b6c5a8a0c1
commit d9c0f2599d
11 changed files with 203 additions and 40 deletions

View File

@ -5,6 +5,12 @@ http://s.apache.org/luceneversions
======================= Lucene 7.0.0 =======================
New Features
* LUCENE-7703: SegmentInfos now record the Lucene version at index creation
time. (Adrien Grand)
API Changes
* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.

View File

@ -702,13 +702,27 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
if (VERBOSE) {
System.out.println("\nTEST: old index " + name);
}
Directory oldDir = oldIndexDirs.get(name);
Version indexCreatedVersion = SegmentInfos.readLatestCommit(oldDir).getIndexCreatedVersion();
Directory targetDir = newDirectory();
// Simulate writing into an index that was created on the same version
new SegmentInfos(indexCreatedVersion).commit(targetDir);
IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random())));
w.addIndexes(oldIndexDirs.get(name));
w.addIndexes(oldDir);
w.close();
targetDir.close();
// Now check that we forbid calling addIndexes with a different version
targetDir = newDirectory();
IndexWriter oldWriter = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random())));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> oldWriter.addIndexes(oldDir));
assertTrue(e.getMessage(), e.getMessage().startsWith("Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version."));
if (VERBOSE) {
System.out.println("\nTEST: done adding indices; now close");
}
w.close();
oldWriter.close();
targetDir.close();
}
@ -1221,6 +1235,20 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
}
public void testIndexCreatedVersion() throws IOException {
for (String name : oldNames) {
Directory dir = oldIndexDirs.get(name);
SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
// those indexes are created by a single version so we can
// compare the commit version with the created version
if (infos.getCommitLuceneVersion().onOrAfter(Version.LUCENE_7_0_0)) {
assertEquals(infos.getCommitLuceneVersion(), infos.getIndexCreatedVersion());
} else {
assertNull(infos.getIndexCreatedVersion());
}
}
}
public void verifyUsesDefaultCodec(Directory dir, String name) throws Exception {
DirectoryReader r = DirectoryReader.open(dir);
for (LeafReaderContext context : r.leaves()) {
@ -1284,7 +1312,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
}
private int checkAllSegmentsUpgraded(Directory dir) throws IOException {
private int checkAllSegmentsUpgraded(Directory dir, Version indexCreatedVersion) throws IOException {
final SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
if (VERBOSE) {
System.out.println("checkAllSegmentsUpgraded: " + infos);
@ -1293,6 +1321,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertEquals(Version.LATEST, si.info.getVersion());
}
assertEquals(Version.LATEST, infos.getCommitLuceneVersion());
assertEquals(indexCreatedVersion, infos.getIndexCreatedVersion());
return infos.size();
}
@ -1310,10 +1339,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
System.out.println("testUpgradeOldIndex: index=" +name);
}
Directory dir = newDirectory(oldIndexDirs.get(name));
Version indexCreatedVersion = SegmentInfos.readLatestCommit(dir).getIndexCreatedVersion();
newIndexUpgrader(dir).upgrade();
checkAllSegmentsUpgraded(dir);
checkAllSegmentsUpgraded(dir, indexCreatedVersion);
dir.close();
}
@ -1324,7 +1354,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
PrintStream savedSystemOut = System.out;
System.setOut(new PrintStream(new ByteArrayOutputStream(), false, "UTF-8"));
try {
for (String name : oldIndexDirs.keySet()) {
for (Map.Entry<String,Directory> entry : oldIndexDirs.entrySet()) {
String name = entry.getKey();
Version indexCreatedVersion = SegmentInfos.readLatestCommit(entry.getValue()).getIndexCreatedVersion();
Path dir = createTempDir(name);
TestUtil.unzip(getDataInputStream("index." + name + ".zip"), dir);
@ -1360,7 +1392,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
Directory upgradedDir = newFSDirectory(dir);
try {
checkAllSegmentsUpgraded(upgradedDir);
checkAllSegmentsUpgraded(upgradedDir, indexCreatedVersion);
} finally {
upgradedDir.close();
}
@ -1377,6 +1409,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
Directory dir = newDirectory(oldIndexDirs.get(name));
assertEquals("Original index must be single segment", 1, getNumberOfSegments(dir));
Version indexCreatedVersion = SegmentInfos.readLatestCommit(dir).getIndexCreatedVersion();
// create a bunch of dummy segments
int id = 40;
@ -1418,7 +1451,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertEquals(1, DirectoryReader.listCommits(dir).size());
newIndexUpgrader(dir).upgrade();
final int segCount = checkAllSegmentsUpgraded(dir);
final int segCount = checkAllSegmentsUpgraded(dir, indexCreatedVersion);
assertEquals("Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
origSegCount, segCount);
@ -1435,7 +1468,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
newIndexUpgrader(dir).upgrade();
checkAllSegmentsUpgraded(dir);
checkAllSegmentsUpgraded(dir, null);
dir.close();
}

View File

@ -78,10 +78,11 @@ public class TestFixBrokenOffsets extends LuceneTestCase {
MockDirectoryWrapper tmpDir = newMockDirectory();
tmpDir.setCheckIndexOnClose(false);
IndexWriter w = new IndexWriter(tmpDir, new IndexWriterConfig());
w.addIndexes(dir);
IndexWriter finalW = w;
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> finalW.addIndexes(dir));
assertTrue(e.getMessage(), e.getMessage().startsWith("Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version."));
w.close();
// OK: addIndexes(Directory...) also keeps version as 6.3.0, so offsets not checked:
TestUtil.checkIndex(tmpDir);
// OK: addIndexes(Directory...) refuses to execute if the index creation version is different so broken offsets are not carried over
tmpDir.close();
final MockDirectoryWrapper tmpDir2 = newMockDirectory();

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
public class TestIndexWriterOnOldIndex extends LuceneTestCase {
public void testOpenModeAndCreatedVersion() throws IOException {
InputStream resource = getClass().getResourceAsStream("index.single-empty-doc.630.zip");
assertNotNull(resource);
Path path = createTempDir();
TestUtil.unzip(resource, path);
Directory dir = newFSDirectory(path);
for (OpenMode openMode : OpenMode.values()) {
Directory tmpDir = newDirectory(dir);
assertEquals(null /** 6.3.0 */, SegmentInfos.readLatestCommit(tmpDir).getIndexCreatedVersion());
IndexWriter w = new IndexWriter(tmpDir, newIndexWriterConfig().setOpenMode(openMode));
w.commit();
w.close();
switch (openMode) {
case CREATE:
assertEquals(Version.LATEST, SegmentInfos.readLatestCommit(tmpDir).getIndexCreatedVersion());
break;
default:
assertEquals(null /** 6.3.0 */, SegmentInfos.readLatestCommit(tmpDir).getIndexCreatedVersion());
}
tmpDir.close();
}
dir.close();
}
}

View File

@ -30,6 +30,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
@ -864,14 +865,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// against an index that's currently open for
// searching. In this case we write the next
// segments_N file with no segments:
SegmentInfos sis = null;
final SegmentInfos sis = new SegmentInfos(Version.LATEST);
try {
sis = SegmentInfos.readLatestCommit(directory);
sis.clear();
final SegmentInfos previous = SegmentInfos.readLatestCommit(directory);
sis.updateGenerationVersionAndCounter(previous);
} catch (IOException e) {
// Likely this means it's a fresh directory
initialIndexExists = false;
sis = new SegmentInfos();
}
segmentInfos = sis;
@ -2624,6 +2624,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
*
* <p>This requires this index not be among those to be added.
*
* <p>All added indexes must have been created by the same
* Lucene version as this index.
*
* @return The <a href="#sequence_number">sequence number</a>
* for this operation
*
@ -2663,6 +2666,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
infoStream.message("IW", "addIndexes: process directory " + dir);
}
SegmentInfos sis = SegmentInfos.readLatestCommit(dir); // read infos from dir
if (Objects.equals(segmentInfos.getIndexCreatedVersion(), sis.getIndexCreatedVersion()) == false) {
throw new IllegalArgumentException("Cannot use addIndexes(Directory) with indexes that have been created "
+ "by a different Lucene version. The current index was generated by "
+ segmentInfos.getIndexCreatedVersion()
+ " while one of the directories contains an index that was generated with "
+ sis.getIndexCreatedVersion());
}
totalMaxDoc += sis.totalMaxDoc();
commits.add(sis);
}
@ -4600,7 +4610,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// For infoStream output
synchronized SegmentInfos toLiveInfos(SegmentInfos sis) {
final SegmentInfos newSIS = new SegmentInfos();
final SegmentInfos newSIS = new SegmentInfos(sis.getIndexCreatedVersion());
final Map<SegmentCommitInfo,SegmentCommitInfo> liveSIS = new HashMap<>();
for(SegmentCommitInfo info : segmentInfos) {
liveSIS.put(info, info);

View File

@ -124,8 +124,10 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
/** Adds the {@link Version} that committed this segments_N file, as well as the {@link Version} of the oldest segment, since 5.3+ */
public static final int VERSION_53 = 6;
/** The version that added information about the Lucene version at the time when the index has been created. */
public static final int VERSION_70 = 7;
static final int VERSION_CURRENT = VERSION_53;
static final int VERSION_CURRENT = VERSION_70;
/** Used to name new segments. */
// TODO: should this be a long ...?
@ -153,18 +155,22 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
/** Id for this commit; only written starting with Lucene 5.0 */
private byte[] id;
/** Which Lucene version wrote this commit, or null if this commit is pre-5.3. */
/** Which Lucene version wrote this commit. */
private Version luceneVersion;
/** Version of the oldest segment in the index, or null if there are no segments. */
private Version minSegmentLuceneVersion;
/** Sole constructor. Typically you call this and then
* use {@link #readLatestCommit(Directory) or
* #readCommit(Directory,String)} to populate each {@link
* SegmentCommitInfo}. Alternatively, you can add/remove your
* own {@link SegmentCommitInfo}s. */
public SegmentInfos() {
/** The Lucene version that was used to create the index. */
private final Version indexCreatedVersion;
/** Sole constructor.
* @param indexCreatedVersion the Lucene version at index creation time, or {@code null} if the index was created before 7.0 */
public SegmentInfos(Version indexCreatedVersion) {
if (indexCreatedVersion != null && indexCreatedVersion.onOrAfter(Version.LUCENE_7_0_0) == false) {
throw new IllegalArgumentException("indexCreatedVersion may only be non-null if the index was created on or after 7.0, got " + indexCreatedVersion);
}
this.indexCreatedVersion = indexCreatedVersion;
}
/** Returns {@link SegmentCommitInfo} at the provided
@ -302,18 +308,37 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
input.readBytes(id, 0, id.length);
CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
SegmentInfos infos = new SegmentInfos();
Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
if (luceneVersion.onOrAfter(Version.LUCENE_6_0_0) == false) {
// TODO: should we check indexCreatedVersion instead?
throw new IndexFormatTooOldException(input, "this index is too old (version: " + luceneVersion + ")");
}
Version indexCreatedVersion;
if (format >= VERSION_70) {
byte b = input.readByte();
switch (b) {
case 0:
// version is not known: pre-7.0 index that has been modified since the 7.0 upgrade
indexCreatedVersion = null;
break;
case 1:
// version is known: index has been created on or after 7.0
indexCreatedVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
break;
default:
throw new CorruptIndexException("Illegal byte value for a boolean: " + b + ", expected 0 or 1", input);
}
} else {
// pre-7.0 index that has not been modified since the 7.0 upgrade
indexCreatedVersion = null;
}
SegmentInfos infos = new SegmentInfos(indexCreatedVersion);
infos.id = id;
infos.generation = generation;
infos.lastGeneration = generation;
if (format >= VERSION_53) {
infos.luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
if (infos.luceneVersion.onOrAfter(Version.LUCENE_6_0_0) == false) {
throw new IndexFormatTooOldException(input, "this index is too old (version: " + infos.luceneVersion + ")");
}
} else {
throw new IndexFormatTooOldException(input, "this index segments file is too old (segment infos format: " + format + ")");
}
infos.luceneVersion = luceneVersion;
infos.version = input.readLong();
//System.out.println("READ sis version=" + infos.version);
@ -470,6 +495,17 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
out.writeVInt(Version.LATEST.bugfix);
//System.out.println(Thread.currentThread().getName() + ": now write " + out.getName() + " with version=" + version);
if (indexCreatedVersion != null) {
// 7.0+ index
out.writeByte((byte) 1);
out.writeVInt(indexCreatedVersion.major);
out.writeVInt(indexCreatedVersion.minor);
out.writeVInt(indexCreatedVersion.bugfix);
} else {
// pre-7.0 index
out.writeByte((byte) 0);
}
out.writeLong(version);
out.writeInt(counter); // write counter
out.writeInt(size());
@ -1001,4 +1037,11 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
public Version getMinSegmentLuceneVersion() {
return minSegmentLuceneVersion;
}
/** Return the version that was used to initially create the index. This
* version is set when the index is first created and then never changes.
* This returns {@code null} if the index was created before 7.0. */
public Version getIndexCreatedVersion() {
return indexCreatedVersion;
}
}

View File

@ -94,6 +94,7 @@ import org.apache.lucene.util.SetOnce;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@ -2799,5 +2800,13 @@ public class TestIndexWriter extends LuceneTestCase {
dir.close();
}
}
public void testRecordsIndexCreatedVersion() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
w.commit();
w.close();
assertEquals(Version.LATEST, SegmentInfos.readLatestCommit(dir).getIndexCreatedVersion());
dir.close();
}
}

View File

@ -29,9 +29,14 @@ import java.util.Collections;
public class TestSegmentInfos extends LuceneTestCase {
public void testIllegalCreatedVersion() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new SegmentInfos(Version.LUCENE_6_5_0));
assertEquals("indexCreatedVersion may only be non-null if the index was created on or after 7.0, got 6.5.0", e.getMessage());
}
// LUCENE-5954
public void testVersionsNoSegments() throws IOException {
SegmentInfos sis = new SegmentInfos();
SegmentInfos sis = new SegmentInfos(Version.LATEST);
BaseDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false);
sis.commit(dir);
@ -48,7 +53,7 @@ public class TestSegmentInfos extends LuceneTestCase {
byte id[] = StringHelper.randomId();
Codec codec = Codec.getDefault();
SegmentInfos sis = new SegmentInfos();
SegmentInfos sis = new SegmentInfos(Version.LATEST);
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(),
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
info.setFiles(Collections.<String>emptySet());
@ -70,7 +75,7 @@ public class TestSegmentInfos extends LuceneTestCase {
byte id[] = StringHelper.randomId();
Codec codec = Codec.getDefault();
SegmentInfos sis = new SegmentInfos();
SegmentInfos sis = new SegmentInfos(Version.LATEST);
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_6_0_0, "_0", 1, false, Codec.getDefault(),
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
info.setFiles(Collections.<String>emptySet());

View File

@ -49,7 +49,7 @@ import org.apache.lucene.util.SuppressForbidden;
* careful!
*/
public class IndexSplitter {
public SegmentInfos infos;
public final SegmentInfos infos;
FSDirectory fsDir;
@ -133,7 +133,7 @@ public class IndexSplitter {
public void split(Path destDir, String[] segs) throws IOException {
Files.createDirectories(destDir);
FSDirectory destFSDir = FSDirectory.open(destDir);
SegmentInfos destInfos = new SegmentInfos();
SegmentInfos destInfos = new SegmentInfos(infos.getIndexCreatedVersion());
destInfos.counter = infos.counter;
for (String n : segs) {
SegmentCommitInfo infoPerCommit = getInfo(n);

View File

@ -50,6 +50,7 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/** Replica node, that pulls index changes from the primary node by copying newly flushed or merged index files.
*
@ -138,7 +139,7 @@ public abstract class ReplicaNode extends Node {
SegmentInfos infos;
if (segmentsFileName == null) {
// No index here yet:
infos = new SegmentInfos();
infos = new SegmentInfos(Version.LATEST);
message("top: init: no segments in index");
} else {
message("top: init: read existing segments commit " + segmentsFileName);