From 1d5d73eefc6a458bb8bc281df35ef9d423bdd4df Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Thu, 21 Aug 2014 22:58:30 +0000
Subject: [PATCH] LUCENE-5985: add id for each segment and commit to aid
replication
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1619620 13f79535-47bb-0310-9956-ffa450edef68
---
lucene/CHANGES.txt | 5 ++
.../SimpleTextSegmentInfoReader.java | 27 +++---
.../SimpleTextSegmentInfoWriter.java | 5 ++
.../lucene46/Lucene46SegmentInfoFormat.java | 6 +-
.../lucene46/Lucene46SegmentInfoReader.java | 9 +-
.../lucene46/Lucene46SegmentInfoWriter.java | 1 +
.../org/apache/lucene/index/CheckIndex.java | 3 +-
.../index/DocumentsWriterPerThread.java | 3 +-
.../org/apache/lucene/index/IndexWriter.java | 7 +-
.../org/apache/lucene/index/SegmentInfo.java | 24 ++++-
.../org/apache/lucene/index/SegmentInfos.java | 23 ++++-
.../org/apache/lucene/util/StringHelper.java | 89 ++++++++++++++++++-
.../apache/lucene/index/TestIndexWriter.java | 54 ++++++++++-
.../apache/lucene/util/LuceneTestCase.java | 9 ++
14 files changed, 237 insertions(+), 28 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 67b9d224a93..2cfdc7597d3 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -98,6 +98,11 @@ Other
======================= Lucene 4.11.0 ======================
(No Changes)
+New Features
+
+* LUCENE-5895: Lucene now stores a unique id per-segment and per-commit to aid
+ in accurate replication of index files (Robert Muir, Mike McCandless)
+
======================= Lucene 4.10.0 ======================
New Features
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
index 1a416d9ed14..cfeffed4513 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
@@ -17,15 +17,6 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License.
*/
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
-import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
-
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
@@ -44,6 +35,16 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_ID;
+import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
+
/**
* reads plaintext segments files
*
@@ -99,10 +100,14 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
files.add(fileName);
}
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_ID);
+ final String id = readString(SI_ID.length, scratch);
+
SimpleTextUtil.checkFooter(input);
- SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
- isCompoundFile, null, diagnostics);
+ SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
+ isCompoundFile, null, diagnostics, id);
info.setFiles(files);
success = true;
return info;
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
index 38f15fa1ed0..3b17c057169 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
@@ -48,6 +48,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
+ final static BytesRef SI_ID = new BytesRef(" id ");
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
@@ -104,6 +105,10 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
SimpleTextUtil.writeNewline(output);
}
}
+
+ SimpleTextUtil.write(output, SI_ID);
+ SimpleTextUtil.write(output, si.getId(), scratch);
+ SimpleTextUtil.writeNewline(output);
SimpleTextUtil.writeChecksum(output, scratch);
success = true;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
index 090b216523c..63f33f10564 100755
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoFormat.java
@@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
*
* Files:
*
- * - .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
+ *
- .si: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Id, Footer
*
*
* Data types:
@@ -44,6 +44,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
* Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}
* IsCompoundFile --> {@link DataOutput#writeByte Int8}
* Footer --> {@link CodecUtil#writeFooter CodecFooter}
+ * Id --> {@link DataOutput#writeString String}
*
*
* Field Descriptions:
@@ -88,5 +89,6 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
static final String CODEC_NAME = "Lucene46SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CHECKSUM = 1;
- static final int VERSION_CURRENT = VERSION_CHECKSUM;
+ static final int VERSION_ID = 2;
+ static final int VERSION_CURRENT = VERSION_ID;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
index 5e9e6113186..66b3c6b87fc 100755
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoReader.java
@@ -62,13 +62,20 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
final Map diagnostics = input.readStringStringMap();
final Set files = input.readStringSet();
+ String id;
+ if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_ID) {
+ id = input.readString();
+ } else {
+ id = null;
+ }
+
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
CodecUtil.checkFooter(input);
} else {
CodecUtil.checkEOF(input);
}
- final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
+ final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
si.setFiles(files);
success = true;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
index 7e51e19c084..b56feb51007 100755
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene46/Lucene46SegmentInfoWriter.java
@@ -59,6 +59,7 @@ public class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringSet(si.files());
+ output.writeString(si.getId());
CodecUtil.writeFooter(output);
success = true;
} finally {
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index f41a9f715d2..dccdd0132b1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -484,7 +484,7 @@ public class CheckIndex {
}
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
- + " " + versionString + " format=" + sFormat + userDataString);
+ + " " + versionString + " id=" + sis.getId() + " format=" + sFormat + userDataString);
if (onlySegments != null) {
result.partial = true;
@@ -535,6 +535,7 @@ public class CheckIndex {
try {
msg(infoStream, " version=" + (version == null ? "3.0" : version));
+ msg(infoStream, " id=" + info.info.getId());
final Codec codec = info.info.getCodec();
msg(infoStream, " codec=" + codec);
segInfoStat.codec = codec;
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
index 030728503dd..5bd8db130ba 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@@ -39,6 +39,7 @@ import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.MutableBits;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
@@ -178,7 +179,7 @@ class DocumentsWriterPerThread {
pendingUpdates.clear();
deleteSlice = deleteQueue.newSlice();
- segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null);
+ segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null, StringHelper.randomId());
assert numDocsInRAM == 0;
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index d454b2e5ac1..cf5d22778dc 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -63,6 +63,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.Version;
@@ -2566,7 +2567,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
SegmentInfo info = new SegmentInfo(directory, Version.LATEST, mergedName, -1,
- false, codec, null);
+ false, codec, null, StringHelper.randomId());
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
MergeState.CheckAbort.NONE, globalFieldNumberMap,
@@ -2667,7 +2668,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// Same SI as before but we change directory and name
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
info.info.getUseCompoundFile(), info.info.getCodec(),
- info.info.getDiagnostics());
+ info.info.getDiagnostics(), StringHelper.randomId());
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo,
info.getDelCount(), info.getDelGen(), info.getFieldInfosGen(),
info.getDocValuesGen());
@@ -3789,7 +3790,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// ConcurrentMergePolicy we keep deterministic segment
// names.
final String mergeSegmentName = newSegmentName();
- SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null);
+ SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null, StringHelper.randomId());
Map details = new HashMap<>();
details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);
details.put("mergeFactor", Integer.toString(merge.segments.size()));
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
index d4ac8ca0185..d676e85fd4c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -27,7 +27,6 @@ import java.util.regex.Matcher;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.TrackingDirectoryWrapper;
-import org.apache.lucene.util.Constants;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
@@ -58,10 +57,13 @@ public final class SegmentInfo {
private boolean isCompoundFile;
+ /** Id that uniquely identifies this segment. */
+ private final String id;
+
private Codec codec;
private Map diagnostics;
-
+
// Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
@@ -79,13 +81,23 @@ public final class SegmentInfo {
return diagnostics;
}
+ /**
+ * Construct a new complete SegmentInfo instance from
+ * input, with a newly generated random id.
+ */
+ public SegmentInfo(Directory dir, Version version, String name, int docCount,
+ boolean isCompoundFile, Codec codec, Map diagnostics) {
+ this(dir, version, name, docCount, isCompoundFile, codec, diagnostics, null);
+ }
+
/**
* Construct a new complete SegmentInfo instance from input.
* Note: this is public only to allow access from
* the codecs package.
*/
public SegmentInfo(Directory dir, Version version, String name, int docCount,
- boolean isCompoundFile, Codec codec, Map diagnostics) {
+ boolean isCompoundFile, Codec codec, Map diagnostics,
+ String id) {
assert !(dir instanceof TrackingDirectoryWrapper);
this.dir = dir;
this.version = version;
@@ -94,6 +106,7 @@ public final class SegmentInfo {
this.isCompoundFile = isCompoundFile;
this.codec = codec;
this.diagnostics = diagnostics;
+ this.id = id;
}
/**
@@ -212,6 +225,11 @@ public final class SegmentInfo {
return version;
}
+ /** Return the id that uniquely identifies this segment. */
+ public String getId() {
+ return id;
+ }
+
private Set setFiles;
/** Sets the files written for this segment. */
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
index 38c64d2504c..55e2776d751 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -27,8 +27,8 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
-import java.util.Map;
import java.util.Map.Entry;
+import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.Codec;
@@ -43,6 +43,7 @@ import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NoSuchDirectoryException;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.StringHelper;
/**
* A collection of segmentInfo objects with methods for operating on those
@@ -137,6 +138,9 @@ public final class SegmentInfos implements Cloneable, Iterable= VERSION_410) {
+ id = input.readString();
+ }
if (format >= VERSION_48) {
CodecUtil.checkFooter(input);
@@ -470,7 +486,7 @@ public final class SegmentInfos implements Cloneable, Iterable 8) {
+ prop = prop.substring(prop.length()-8);
+ }
+ x0 = Long.parseLong(prop, 16);
+ x1 = x0;
+ } else {
+ // "Ghetto randomess" from 3 different sources:
+ x0 = System.nanoTime();
+ x1 = StringHelper.class.hashCode() << 32;
+ StringBuilder sb = new StringBuilder();
+ // Properties can vary across JVM instances:
+ Properties p = System.getProperties();
+ for (String s: p.stringPropertyNames()) {
+ sb.append(s);
+ sb.append(p.getProperty(s));
+ }
+ x1 |= sb.toString().hashCode();
+ // TODO: maybe read from /dev/urandom when it's available?
+ }
+
+ // Use a few iterations of xorshift128 to scatter the seed
+ // in case multiple Lucene instances starting up "near" the same
+ // nanoTime, since we use ++ (mod 2^128) for full period cycle:
+ for(int i=0;i<10;i++) {
+ long s1 = x0;
+ long s0 = x1;
+ x0 = s0;
+ s1 ^= s1 << 23; // a
+ x1 = s1 ^ s0 ^ (s1 >>> 17) ^ (s0 >>> 26); // b, c
+ }
+
+ // Concatentate bits of x0 and x1, as unsigned 128 bit integer:
+ nextId = new BigInteger(1, BigInteger.valueOf(x0).shiftLeft(64).or(BigInteger.valueOf(x1)).toByteArray());
+ }
+
+ /** Generates a non-cryptographic globally unique id. */
+ public static String randomId() {
+
+ // NOTE: we don't use Java's UUID.randomUUID() implementation here because:
+ //
+ // * It's overkill for our usage: it tries to be cryptographically
+ // secure, whereas for this use we don't care if someone can
+ // guess the IDs.
+ //
+ // * It uses SecureRandom, which on Linux can easily take a long time
+ // (I saw ~ 10 seconds just running a Lucene test) when entropy
+ // harvesting is falling behind.
+ //
+ // * It loses a few (6) bits to version and variant and it's not clear
+ // what impact that has on the period, whereas the simple ++ (mod 2^128)
+ // we use here is guaranteed to have the full period.
+
+ String id;
+ synchronized(idLock) {
+ id = nextId.toString(16);
+ nextId = nextId.add(BigInteger.ONE).and(idMask);
+ }
+
+ assert id.length() <= 32: "id=" + id;
+ id = idPad.substring(id.length()) + id;
+
+ return id;
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
index 5d32744a048..b840585dc4a 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -81,11 +81,12 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SetOnce;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.automaton.Automata;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.junit.Test;
public class TestIndexWriter extends LuceneTestCase {
@@ -2819,4 +2820,55 @@ public class TestIndexWriter extends LuceneTestCase {
iw.close();
dir.close();
}
+
+ // LUCENE-5895:
+
+ /** Make sure we see ids per segment and per commit. */
+ public void testIds() throws Exception {
+ Directory d = newDirectory();
+ IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
+ w.addDocument(new Document());
+ w.close();
+
+ SegmentInfos sis = new SegmentInfos();
+ sis.read(d);
+ String id1 = sis.getId();
+ assertNotNull(id1);
+
+ String id2 = sis.info(0).info.getId();
+ if (defaultCodecSupportsSegmentIds()) {
+ assertNotNull(id2);
+ } else {
+ assertNull(id2);
+ }
+
+ // Make sure CheckIndex includes id output:
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+ CheckIndex checker = new CheckIndex(d);
+ checker.setCrossCheckTermVectors(false);
+ checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false);
+ CheckIndex.Status indexStatus = checker.checkIndex(null);
+ String s = bos.toString(IOUtils.UTF_8);
+ // Make sure CheckIndex didn't fail
+ assertTrue(s, indexStatus != null && indexStatus.clean);
+
+ // Commit id is always stored:
+ assertTrue("missing id=" + id1 + " in:\n" + s, s.contains("id=" + id1));
+
+ // Per-segment id may or may not be stored depending on the codec:
+ if (defaultCodecSupportsSegmentIds()) {
+ assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2));
+ } else {
+ assertTrue("missing id=null in:\n" + s, s.contains("id=null"));
+ }
+ d.close();
+
+ Set ids = new HashSet<>();
+ for(int i=0;i<100000;i++) {
+ String id = StringHelper.randomId();
+ assertFalse("id=" + id + " i=" + i, ids.contains(id));
+ ids.add(id);
+ }
+ }
}
+
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
index 7faf112565e..e6522d13264 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
@@ -55,6 +55,9 @@ import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat;
+import org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoFormat;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -1736,6 +1739,12 @@ public abstract class LuceneTestCase extends Assert {
return true;
}
+ /** Returns true if the codec "supports" writing segment and commit ids. */
+ public static boolean defaultCodecSupportsSegmentIds() {
+ SegmentInfoFormat siFormat = Codec.getDefault().segmentInfoFormat();
+ return siFormat instanceof SimpleTextSegmentInfoFormat || siFormat instanceof Lucene46SegmentInfoFormat;
+ }
+
public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
assertReaderStatisticsEquals(info, leftReader, rightReader);
assertFieldsEquals(info, leftReader, MultiFields.getFields(leftReader), MultiFields.getFields(rightReader), true);