LUCENE-5954: write oldest segment version, and segments_N version, in the segments file

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1684489 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-06-09 17:55:07 +00:00
parent 1faa4105c1
commit 5c9d550eec
5 changed files with 225 additions and 28 deletions

View File

@ -61,6 +61,11 @@ New Features
GeoPointInPolygonQuery for simple "indexed lat/lon point in
bbox/shape" searching. (Nick Knize via Mike McCandless)
* LUCENE-5954: The segments_N commit point now stores the Lucene
version that wrote the commit as well as the lucene version that
wrote the oldest segment in the index, for faster checking of "too
old" indices (Ryan Ernst, Robert Muir, Mike McCandless)
API Changes
* LUCENE-6508: Simplify Lock api, there is now just

View File

@ -501,6 +501,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
fail("DirectoryReader.open should not pass for "+unsupportedNames[i]);
} catch (IndexFormatTooOldException e) {
// pass
if (VERBOSE) {
System.out.println("TEST: got expected exc:");
e.printStackTrace(System.out);
}
} finally {
if (reader != null) reader.close();
reader = null;
@ -776,6 +780,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
public void changeIndexWithAdds(Random random, Directory dir, Version nameVersion) throws IOException {
SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
if (nameVersion.onOrAfter(Version.LUCENE_5_3_0)) {
assertEquals(nameVersion, infos.getCommitLuceneVersion());
}
assertEquals(nameVersion, infos.getMinSegmentLuceneVersion());
// open writer
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random))
.setOpenMode(OpenMode.APPEND)

View File

@ -17,20 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
@ -44,6 +30,21 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Set;
/**
* A collection of segmentInfo objects with methods for operating on those
@ -60,13 +61,16 @@ import org.apache.lucene.util.StringHelper;
* <p>
* Files:
* <ul>
* <li><tt>segments_N</tt>: Header, Version, NameCounter, SegCount, &lt;SegName,
* <li><tt>segments_N</tt>: Header, LuceneVersion, Version, NameCounter, SegCount, MinSegmentLuceneVersion, &lt;SegName,
* HasSegID, SegID, SegCodec, DelGen, DeletionCount, FieldInfosGen, DocValuesGen,
* UpdatesFiles&gt;<sup>SegCount</sup>, CommitUserData, Footer
* </ul>
* Data types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}</li>
* <li>LuceneVersion --&gt; Which Lucene code {@link Version} was used for this commit, written as three {@link DataOutput#writeVInt vInt}: major, minor, bugfix
* <li>MinSegmentLuceneVersion --&gt; Lucene code {@link Version} of the oldest segment, written as three {@link DataOutput#writeVInt vInt}: major, minor, bugfix; this is only
* written only if there's at least one segment
* <li>NameCounter, SegCount, DeletionCount --&gt;
* {@link DataOutput#writeInt Int32}</li>
* <li>Generation, Version, DelGen, Checksum, FieldInfosGen, DocValuesGen --&gt;
@ -119,7 +123,10 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
public static final int VERSION_50 = 4;
/** The file format version for the segments_N codec header, since 5.1+ */
public static final int VERSION_51 = 5; // use safe maps
static final int VERSION_CURRENT = VERSION_51;
/** Adds the {@link Version} that committed this segments_N file, as well as the {@link Version} of the oldest segment, since 5.3+ */
public static final int VERSION_53 = 6;
static final int VERSION_CURRENT = VERSION_53;
/** Used to name new segments. */
// TODO: should this be a long ...?
@ -147,6 +154,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
/** Id for this commit; only written starting with Lucene 5.0 */
private byte[] id;
/** Which Lucene version wrote this commit, or null if this commit is pre-5.3. */
private Version luceneVersion;
/** Version of the oldest segment in the index, or null if there are no segments. */
private Version minSegmentLuceneVersion;
/** Sole constructor. Typically you call this and then
* use {@link #readLatestCommit(Directory) or
* #readCommit(Directory,String)} to populate each {@link
@ -282,12 +295,35 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
infos.id = id;
infos.generation = generation;
infos.lastGeneration = generation;
if (format >= VERSION_53) {
// TODO: in the future (7.0? sigh) we can use this to throw IndexFormatTooOldException ... or just rely on the
// minSegmentLuceneVersion check instead:
infos.luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
} else {
// else compute the min version down below in the for loop
}
infos.version = input.readLong();
infos.counter = input.readInt();
int numSegments = input.readInt();
if (numSegments < 0) {
throw new CorruptIndexException("invalid segment count: " + numSegments, input);
}
if (format >= VERSION_53) {
if (numSegments > 0) {
infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_5_0_0) == false) {
throw new IndexFormatTooOldException(input, "this index contains a too-old segment (version: " + infos.minSegmentLuceneVersion + ")");
}
} else {
// else leave as null: no segments
}
} else {
// else we recompute it below as we visit segments; it can't be used for throwing IndexFormatTooOldExc, but consumers of
// SegmentInfos can maybe still use it for other reasons
}
long totalDocs = 0;
for (int seg = 0; seg < numSegments; seg++) {
String segName = input.readString();
@ -301,7 +337,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
} else {
throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
}
Codec codec = readCodec(input);
Codec codec = readCodec(input, format < VERSION_53);
SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
info.setCodec(codec);
totalDocs += info.maxDoc();
@ -335,7 +371,17 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
}
siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
infos.add(siPerCommit);
Version segmentVersion = info.getVersion();
if (format < VERSION_53) {
if (infos.minSegmentLuceneVersion == null || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
infos.minSegmentLuceneVersion = segmentVersion;
}
} else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input);
}
}
if (format >= VERSION_51) {
infos.userData = input.readMapOfStrings();
} else {
@ -357,13 +403,15 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45", "Lucene46", "Lucene49", "Lucene410"
);
private static Codec readCodec(DataInput input) throws IOException {
private static Codec readCodec(DataInput input, boolean unsupportedAllowed) throws IOException {
final String name = input.readString();
try {
return Codec.forName(name);
} catch (IllegalArgumentException e) {
// give better error messages if we can, first check if this is a legacy codec
if (unsupportedCodecs.contains(name)) {
// We should only get here on pre-5.3 indices, but we can't test this until 7.0 when 5.x indices become too old:
assert unsupportedAllowed;
IOException newExc = new IndexFormatTooOldException(input, "Codec '" + name + "' is too old");
newExc.initCause(e);
throw newExc;
@ -407,10 +455,34 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
try {
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
CodecUtil.writeIndexHeader(segnOutput, "segments", VERSION_CURRENT,
StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX));
StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX));
segnOutput.writeVInt(Version.LATEST.major);
segnOutput.writeVInt(Version.LATEST.minor);
segnOutput.writeVInt(Version.LATEST.bugfix);
segnOutput.writeLong(version);
segnOutput.writeInt(counter); // write counter
segnOutput.writeInt(size()); // write infos
segnOutput.writeInt(size());
if (size() > 0) {
Version minSegmentVersion = null;
// We do a separate loop up front so we can write the minSegmentVersion before
// any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time:
for (SegmentCommitInfo siPerCommit : this) {
Version segmentVersion = siPerCommit.info.getVersion();
if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) {
minSegmentVersion = segmentVersion;
}
}
segnOutput.writeVInt(minSegmentVersion.major);
segnOutput.writeVInt(minSegmentVersion.minor);
segnOutput.writeVInt(minSegmentVersion.bugfix);
}
// write infos
for (SegmentCommitInfo siPerCommit : this) {
SegmentInfo si = siPerCommit.info;
segnOutput.writeString(si.name);
@ -898,4 +970,15 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
int indexOf(SegmentCommitInfo si) {
return segments.indexOf(si);
}
/** Returns which Lucene {@link Version} wrote this commit, or null if the
* version this index was written with did not directly record the version. */
public Version getCommitLuceneVersion() {
return luceneVersion;
}
/** Returns the version of the oldest segment, or null if there are no segments. */
public Version getMinSegmentLuceneVersion() {
return minSegmentLuceneVersion;
}
}

View File

@ -17,14 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -37,6 +29,15 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/*
Verify we can read the pre-2.1 file format, do searches
@ -286,6 +287,8 @@ public class TestDeletionPolicy extends LuceneTestCase {
// 1 second resolution, allow +1 second in commit
// age tolerance:
SegmentInfos sis = SegmentInfos.readCommit(dir, fileName);
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
assertEquals(Version.LATEST, sis.getMinSegmentLuceneVersion());
long modTime = Long.parseLong(sis.getUserData().get("commitTime"));
oneSecondResolution &= (modTime % 1000) == 0;
final long leeway = (long) ((SECONDS + (oneSecondResolution ? 1.0:0.0))*1000);

View File

@ -0,0 +1,96 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.util.Collections;
public class TestSegmentInfos extends LuceneTestCase {
// LUCENE-5954
public void testVersionsNoSegments() throws IOException {
SegmentInfos sis = new SegmentInfos();
BaseDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false);
sis.commit(dir);
sis = SegmentInfos.readLatestCommit(dir);
assertNull(sis.getMinSegmentLuceneVersion());
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
dir.close();
}
// LUCENE-5954
public void testVersionsOneSegment() throws IOException {
BaseDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false);
byte id[] = StringHelper.randomId();
Codec codec = Codec.getDefault();
SegmentInfos sis = new SegmentInfos();
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_5_0_0, "_0", 1, false, Codec.getDefault(),
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
sis.add(commitInfo);
sis.commit(dir);
sis = SegmentInfos.readLatestCommit(dir);
assertEquals(Version.LUCENE_5_0_0, sis.getMinSegmentLuceneVersion());
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
dir.close();
}
// LUCENE-5954
public void testVersionsTwoSegments() throws IOException {
BaseDirectoryWrapper dir = newDirectory();
dir.setCheckIndexOnClose(false);
byte id[] = StringHelper.randomId();
Codec codec = Codec.getDefault();
SegmentInfos sis = new SegmentInfos();
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_5_0_0, "_0", 1, false, Codec.getDefault(),
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
sis.add(commitInfo);
info = new SegmentInfo(dir, Version.LUCENE_5_1_0, "_1", 1, false, Codec.getDefault(),
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
info.setFiles(Collections.<String>emptySet());
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
sis.add(commitInfo);
sis.commit(dir);
sis = SegmentInfos.readLatestCommit(dir);
assertEquals(Version.LUCENE_5_0_0, sis.getMinSegmentLuceneVersion());
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
dir.close();
}
}