mirror of https://github.com/apache/lucene.git
LUCENE-5954: write oldest segment version, and segments_N version, in the segments file
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1684489 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1faa4105c1
commit
5c9d550eec
|
@ -61,6 +61,11 @@ New Features
|
|||
GeoPointInPolygonQuery for simple "indexed lat/lon point in
|
||||
bbox/shape" searching. (Nick Knize via Mike McCandless)
|
||||
|
||||
* LUCENE-5954: The segments_N commit point now stores the Lucene
|
||||
version that wrote the commit as well as the lucene version that
|
||||
wrote the oldest segment in the index, for faster checking of "too
|
||||
old" indices (Ryan Ernst, Robert Muir, Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-6508: Simplify Lock api, there is now just
|
||||
|
|
|
@ -501,6 +501,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
fail("DirectoryReader.open should not pass for "+unsupportedNames[i]);
|
||||
} catch (IndexFormatTooOldException e) {
|
||||
// pass
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: got expected exc:");
|
||||
e.printStackTrace(System.out);
|
||||
}
|
||||
} finally {
|
||||
if (reader != null) reader.close();
|
||||
reader = null;
|
||||
|
@ -776,6 +780,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void changeIndexWithAdds(Random random, Directory dir, Version nameVersion) throws IOException {
|
||||
SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
|
||||
if (nameVersion.onOrAfter(Version.LUCENE_5_3_0)) {
|
||||
assertEquals(nameVersion, infos.getCommitLuceneVersion());
|
||||
}
|
||||
assertEquals(nameVersion, infos.getMinSegmentLuceneVersion());
|
||||
|
||||
// open writer
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random))
|
||||
.setOpenMode(OpenMode.APPEND)
|
||||
|
|
|
@ -17,20 +17,6 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
|
@ -44,6 +30,21 @@ import org.apache.lucene.store.IOContext;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A collection of segmentInfo objects with methods for operating on those
|
||||
|
@ -60,13 +61,16 @@ import org.apache.lucene.util.StringHelper;
|
|||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>segments_N</tt>: Header, Version, NameCounter, SegCount, <SegName,
|
||||
* <li><tt>segments_N</tt>: Header, LuceneVersion, Version, NameCounter, SegCount, MinSegmentLuceneVersion, <SegName,
|
||||
* HasSegID, SegID, SegCodec, DelGen, DeletionCount, FieldInfosGen, DocValuesGen,
|
||||
* UpdatesFiles><sup>SegCount</sup>, CommitUserData, Footer
|
||||
* </ul>
|
||||
* Data types:
|
||||
* <ul>
|
||||
* <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader}</li>
|
||||
* <li>LuceneVersion --> Which Lucene code {@link Version} was used for this commit, written as three {@link DataOutput#writeVInt vInt}: major, minor, bugfix
|
||||
* <li>MinSegmentLuceneVersion --> Lucene code {@link Version} of the oldest segment, written as three {@link DataOutput#writeVInt vInt}: major, minor, bugfix; this is only
|
||||
* written only if there's at least one segment
|
||||
* <li>NameCounter, SegCount, DeletionCount -->
|
||||
* {@link DataOutput#writeInt Int32}</li>
|
||||
* <li>Generation, Version, DelGen, Checksum, FieldInfosGen, DocValuesGen -->
|
||||
|
@ -119,7 +123,10 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
public static final int VERSION_50 = 4;
|
||||
/** The file format version for the segments_N codec header, since 5.1+ */
|
||||
public static final int VERSION_51 = 5; // use safe maps
|
||||
static final int VERSION_CURRENT = VERSION_51;
|
||||
/** Adds the {@link Version} that committed this segments_N file, as well as the {@link Version} of the oldest segment, since 5.3+ */
|
||||
public static final int VERSION_53 = 6;
|
||||
|
||||
static final int VERSION_CURRENT = VERSION_53;
|
||||
|
||||
/** Used to name new segments. */
|
||||
// TODO: should this be a long ...?
|
||||
|
@ -147,6 +154,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
/** Id for this commit; only written starting with Lucene 5.0 */
|
||||
private byte[] id;
|
||||
|
||||
/** Which Lucene version wrote this commit, or null if this commit is pre-5.3. */
|
||||
private Version luceneVersion;
|
||||
|
||||
/** Version of the oldest segment in the index, or null if there are no segments. */
|
||||
private Version minSegmentLuceneVersion;
|
||||
|
||||
/** Sole constructor. Typically you call this and then
|
||||
* use {@link #readLatestCommit(Directory) or
|
||||
* #readCommit(Directory,String)} to populate each {@link
|
||||
|
@ -282,12 +295,35 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
infos.id = id;
|
||||
infos.generation = generation;
|
||||
infos.lastGeneration = generation;
|
||||
if (format >= VERSION_53) {
|
||||
// TODO: in the future (7.0? sigh) we can use this to throw IndexFormatTooOldException ... or just rely on the
|
||||
// minSegmentLuceneVersion check instead:
|
||||
infos.luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
||||
} else {
|
||||
// else compute the min version down below in the for loop
|
||||
}
|
||||
|
||||
infos.version = input.readLong();
|
||||
infos.counter = input.readInt();
|
||||
int numSegments = input.readInt();
|
||||
if (numSegments < 0) {
|
||||
throw new CorruptIndexException("invalid segment count: " + numSegments, input);
|
||||
}
|
||||
|
||||
if (format >= VERSION_53) {
|
||||
if (numSegments > 0) {
|
||||
infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
||||
if (infos.minSegmentLuceneVersion.onOrAfter(Version.LUCENE_5_0_0) == false) {
|
||||
throw new IndexFormatTooOldException(input, "this index contains a too-old segment (version: " + infos.minSegmentLuceneVersion + ")");
|
||||
}
|
||||
} else {
|
||||
// else leave as null: no segments
|
||||
}
|
||||
} else {
|
||||
// else we recompute it below as we visit segments; it can't be used for throwing IndexFormatTooOldExc, but consumers of
|
||||
// SegmentInfos can maybe still use it for other reasons
|
||||
}
|
||||
|
||||
long totalDocs = 0;
|
||||
for (int seg = 0; seg < numSegments; seg++) {
|
||||
String segName = input.readString();
|
||||
|
@ -301,7 +337,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
} else {
|
||||
throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
|
||||
}
|
||||
Codec codec = readCodec(input);
|
||||
Codec codec = readCodec(input, format < VERSION_53);
|
||||
SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
|
||||
info.setCodec(codec);
|
||||
totalDocs += info.maxDoc();
|
||||
|
@ -335,7 +371,17 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
}
|
||||
siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
|
||||
infos.add(siPerCommit);
|
||||
|
||||
Version segmentVersion = info.getVersion();
|
||||
if (format < VERSION_53) {
|
||||
if (infos.minSegmentLuceneVersion == null || segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
|
||||
infos.minSegmentLuceneVersion = segmentVersion;
|
||||
}
|
||||
} else if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
|
||||
throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input);
|
||||
}
|
||||
}
|
||||
|
||||
if (format >= VERSION_51) {
|
||||
infos.userData = input.readMapOfStrings();
|
||||
} else {
|
||||
|
@ -357,13 +403,15 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45", "Lucene46", "Lucene49", "Lucene410"
|
||||
);
|
||||
|
||||
private static Codec readCodec(DataInput input) throws IOException {
|
||||
private static Codec readCodec(DataInput input, boolean unsupportedAllowed) throws IOException {
|
||||
final String name = input.readString();
|
||||
try {
|
||||
return Codec.forName(name);
|
||||
} catch (IllegalArgumentException e) {
|
||||
// give better error messages if we can, first check if this is a legacy codec
|
||||
if (unsupportedCodecs.contains(name)) {
|
||||
// We should only get here on pre-5.3 indices, but we can't test this until 7.0 when 5.x indices become too old:
|
||||
assert unsupportedAllowed;
|
||||
IOException newExc = new IndexFormatTooOldException(input, "Codec '" + name + "' is too old");
|
||||
newExc.initCause(e);
|
||||
throw newExc;
|
||||
|
@ -407,10 +455,34 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
try {
|
||||
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
|
||||
CodecUtil.writeIndexHeader(segnOutput, "segments", VERSION_CURRENT,
|
||||
StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX));
|
||||
StringHelper.randomId(), Long.toString(nextGeneration, Character.MAX_RADIX));
|
||||
segnOutput.writeVInt(Version.LATEST.major);
|
||||
segnOutput.writeVInt(Version.LATEST.minor);
|
||||
segnOutput.writeVInt(Version.LATEST.bugfix);
|
||||
|
||||
segnOutput.writeLong(version);
|
||||
segnOutput.writeInt(counter); // write counter
|
||||
segnOutput.writeInt(size()); // write infos
|
||||
segnOutput.writeInt(size());
|
||||
|
||||
if (size() > 0) {
|
||||
|
||||
Version minSegmentVersion = null;
|
||||
|
||||
// We do a separate loop up front so we can write the minSegmentVersion before
|
||||
// any SegmentInfo; this makes it cleaner to throw IndexFormatTooOldExc at read time:
|
||||
for (SegmentCommitInfo siPerCommit : this) {
|
||||
Version segmentVersion = siPerCommit.info.getVersion();
|
||||
if (minSegmentVersion == null || segmentVersion.onOrAfter(minSegmentVersion) == false) {
|
||||
minSegmentVersion = segmentVersion;
|
||||
}
|
||||
}
|
||||
|
||||
segnOutput.writeVInt(minSegmentVersion.major);
|
||||
segnOutput.writeVInt(minSegmentVersion.minor);
|
||||
segnOutput.writeVInt(minSegmentVersion.bugfix);
|
||||
}
|
||||
|
||||
// write infos
|
||||
for (SegmentCommitInfo siPerCommit : this) {
|
||||
SegmentInfo si = siPerCommit.info;
|
||||
segnOutput.writeString(si.name);
|
||||
|
@ -898,4 +970,15 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
int indexOf(SegmentCommitInfo si) {
|
||||
return segments.indexOf(si);
|
||||
}
|
||||
|
||||
/** Returns which Lucene {@link Version} wrote this commit, or null if the
|
||||
* version this index was written with did not directly record the version. */
|
||||
public Version getCommitLuceneVersion() {
|
||||
return luceneVersion;
|
||||
}
|
||||
|
||||
/** Returns the version of the oldest segment, or null if there are no segments. */
|
||||
public Version getMinSegmentLuceneVersion() {
|
||||
return minSegmentLuceneVersion;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,14 +17,6 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -37,6 +29,15 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/*
|
||||
Verify we can read the pre-2.1 file format, do searches
|
||||
|
@ -286,6 +287,8 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
|||
// 1 second resolution, allow +1 second in commit
|
||||
// age tolerance:
|
||||
SegmentInfos sis = SegmentInfos.readCommit(dir, fileName);
|
||||
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
|
||||
assertEquals(Version.LATEST, sis.getMinSegmentLuceneVersion());
|
||||
long modTime = Long.parseLong(sis.getUserData().get("commitTime"));
|
||||
oneSecondResolution &= (modTime % 1000) == 0;
|
||||
final long leeway = (long) ((SECONDS + (oneSecondResolution ? 1.0:0.0))*1000);
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
|
||||
public class TestSegmentInfos extends LuceneTestCase {
|
||||
|
||||
// LUCENE-5954
|
||||
public void testVersionsNoSegments() throws IOException {
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
BaseDirectoryWrapper dir = newDirectory();
|
||||
dir.setCheckIndexOnClose(false);
|
||||
sis.commit(dir);
|
||||
sis = SegmentInfos.readLatestCommit(dir);
|
||||
assertNull(sis.getMinSegmentLuceneVersion());
|
||||
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-5954
|
||||
public void testVersionsOneSegment() throws IOException {
|
||||
BaseDirectoryWrapper dir = newDirectory();
|
||||
dir.setCheckIndexOnClose(false);
|
||||
byte id[] = StringHelper.randomId();
|
||||
Codec codec = Codec.getDefault();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_5_0_0, "_0", 1, false, Codec.getDefault(),
|
||||
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
|
||||
info.setFiles(Collections.<String>emptySet());
|
||||
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
|
||||
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
|
||||
|
||||
sis.add(commitInfo);
|
||||
sis.commit(dir);
|
||||
sis = SegmentInfos.readLatestCommit(dir);
|
||||
assertEquals(Version.LUCENE_5_0_0, sis.getMinSegmentLuceneVersion());
|
||||
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-5954
|
||||
public void testVersionsTwoSegments() throws IOException {
|
||||
BaseDirectoryWrapper dir = newDirectory();
|
||||
dir.setCheckIndexOnClose(false);
|
||||
byte id[] = StringHelper.randomId();
|
||||
Codec codec = Codec.getDefault();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_5_0_0, "_0", 1, false, Codec.getDefault(),
|
||||
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
|
||||
info.setFiles(Collections.<String>emptySet());
|
||||
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
|
||||
SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
|
||||
sis.add(commitInfo);
|
||||
|
||||
info = new SegmentInfo(dir, Version.LUCENE_5_1_0, "_1", 1, false, Codec.getDefault(),
|
||||
Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap());
|
||||
info.setFiles(Collections.<String>emptySet());
|
||||
codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
|
||||
commitInfo = new SegmentCommitInfo(info, 0, -1, -1, -1);
|
||||
sis.add(commitInfo);
|
||||
|
||||
sis.commit(dir);
|
||||
sis = SegmentInfos.readLatestCommit(dir);
|
||||
assertEquals(Version.LUCENE_5_0_0, sis.getMinSegmentLuceneVersion());
|
||||
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue