LUCENE-7999: upgrade int to long for tracking the counter for the next segment name to prevent overflow

This commit is contained in:
Mike McCandless 2017-10-24 13:13:35 -04:00
parent 8993f1b827
commit ea36f5040c
6 changed files with 23 additions and 12 deletions

View File

@ -21,6 +21,11 @@ Bug Fixes
a previous boosted field's factor to subsequent unboosted fields.
(Christine Poerschke)
* LUCENE-7999: Switch from int to long to track the name for the next
segment to write, so that very long lived indices with very frequent
refreshes or commits, and high indexing thread counts, do not
overflow an int (Mykhailo Demianenko via Mike McCandless)
======================= Lucene 7.1.0 =======================
Changes in Runtime Behavior

View File

@ -149,7 +149,7 @@ public final class CheckIndex implements Closeable {
public boolean partial;
/** The greatest segment name. */
public int maxSegmentName;
public long maxSegmentName;
/** Whether the SegmentInfos.counter is greater than any of the segments' names. */
public boolean validCounter;
@ -635,7 +635,7 @@ public final class CheckIndex implements Closeable {
for(int i=0;i<numSegments;i++) {
final SegmentCommitInfo info = sis.info(i);
int segmentName = Integer.parseInt(info.info.name.substring(1), Character.MAX_RADIX);
long segmentName = Long.parseLong(info.info.name.substring(1), Character.MAX_RADIX);
if (segmentName > result.maxSegmentName) {
result.maxSegmentName = segmentName;
}

View File

@ -255,7 +255,7 @@ final class IndexFileDeleter implements Closeable {
static void inflateGens(SegmentInfos infos, Collection<String> files, InfoStream infoStream) {
long maxSegmentGen = Long.MIN_VALUE;
int maxSegmentName = Integer.MIN_VALUE;
long maxSegmentName = Long.MIN_VALUE;
// Confusingly, this is the union of liveDocs, field infos, doc values
// (and maybe others, in the future) gens. This is somewhat messy,
@ -288,7 +288,7 @@ final class IndexFileDeleter implements Closeable {
continue;
}
maxSegmentName = Math.max(maxSegmentName, Integer.parseInt(segmentName.substring(1), Character.MAX_RADIX));
maxSegmentName = Math.max(maxSegmentName, Long.parseLong(segmentName.substring(1), Character.MAX_RADIX));
Long curGen = maxPerSegmentGen.get(segmentName);
if (curGen == null) {

View File

@ -1946,7 +1946,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// problems at least with ConcurrentMergeScheduler.
changeCount.incrementAndGet();
segmentInfos.changed();
return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
return "_" + Long.toString(segmentInfos.counter++, Character.MAX_RADIX);
}
}

View File

@ -120,12 +120,13 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
/** The version that added information about the Lucene version at the time when the index has been created. */
public static final int VERSION_70 = 7;
/** The version that updated segment name counter to be long instead of int. */
public static final int VERSION_72 = 8;
static final int VERSION_CURRENT = VERSION_70;
static final int VERSION_CURRENT = VERSION_72;
/** Used to name new segments. */
// TODO: should this be a long ...?
public int counter;
public long counter;
/** Counts how often the index has been changed. */
public long version;
@ -326,7 +327,11 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
infos.version = input.readLong();
//System.out.println("READ sis version=" + infos.version);
infos.counter = input.readInt();
if (format > VERSION_70) {
infos.counter = input.readVLong();
} else {
infos.counter = input.readInt();
}
int numSegments = input.readInt();
if (numSegments < 0) {
throw new CorruptIndexException("invalid segment count: " + numSegments, input);
@ -469,8 +474,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
out.writeVInt(indexCreatedVersionMajor);
out.writeLong(version);
out.writeInt(counter); // write counter
out.writeLong(version);
out.writeVLong(counter); // write counter
out.writeInt(size());
if (size() > 0) {

View File

@ -428,7 +428,8 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
for (int i = 0; i < 10; i++) {
Directory dir = newDirectory();
Version version = versions[random().nextInt(versions.length)];
String name = "_" + Integer.toString(random().nextInt(Integer.MAX_VALUE), Character.MAX_RADIX);
long randomSegmentIndex = Math.abs(random().nextLong());
String name = "_" + Long.toString(randomSegmentIndex != Long.MIN_VALUE ? randomSegmentIndex : random().nextInt(Integer.MAX_VALUE), Character.MAX_RADIX);
int docCount = TestUtil.nextInt(random(), 1, IndexWriter.MAX_DOCS);
boolean isCompoundFile = random().nextBoolean();
Set<String> files = new HashSet<>();