mirror of https://github.com/apache/lucene.git
LUCENE-5985: add id for each segment and commit to aid replication
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1619620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
271af418c5
commit
1d5d73eefc
|
@ -98,6 +98,11 @@ Other
|
|||
======================= Lucene 4.11.0 ======================
|
||||
(No Changes)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-5895: Lucene now stores a unique id per-segment and per-commit to aid
|
||||
in accurate replication of index files (Robert Muir, Mike McCandless)
|
||||
|
||||
======================= Lucene 4.10.0 ======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -17,15 +17,6 @@ package org.apache.lucene.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
|
@ -44,6 +35,16 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_ID;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
|
||||
|
||||
/**
|
||||
* reads plaintext segments files
|
||||
* <p>
|
||||
|
@ -99,10 +100,14 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
|
|||
files.add(fileName);
|
||||
}
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), SI_ID);
|
||||
final String id = readString(SI_ID.length, scratch);
|
||||
|
||||
SimpleTextUtil.checkFooter(input);
|
||||
|
||||
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
|
||||
isCompoundFile, null, diagnostics);
|
||||
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
|
||||
isCompoundFile, null, diagnostics, id);
|
||||
info.setFiles(files);
|
||||
success = true;
|
||||
return info;
|
||||
|
|
|
@ -48,6 +48,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
|||
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
|
||||
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
|
||||
final static BytesRef SI_FILE = new BytesRef(" file ");
|
||||
final static BytesRef SI_ID = new BytesRef(" id ");
|
||||
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
||||
|
@ -104,6 +105,10 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
|||
SimpleTextUtil.writeNewline(output);
|
||||
}
|
||||
}
|
||||
|
||||
SimpleTextUtil.write(output, SI_ID);
|
||||
SimpleTextUtil.write(output, si.getId(), scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
|
||||
SimpleTextUtil.writeChecksum(output, scratch);
|
||||
success = true;
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
|||
* <p>
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
|
||||
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Id, Footer
|
||||
* </ul>
|
||||
* </p>
|
||||
* Data types:
|
||||
|
@ -44,6 +44,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
|||
* <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* <li>Id --> {@link DataOutput#writeString String}</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
* Field Descriptions:
|
||||
|
@ -88,5 +89,6 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
|
|||
static final String CODEC_NAME = "Lucene46SegmentInfo";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CHECKSUM = 1;
|
||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||
static final int VERSION_ID = 2;
|
||||
static final int VERSION_CURRENT = VERSION_ID;
|
||||
}
|
||||
|
|
|
@ -62,13 +62,20 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
|
|||
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||
final Set<String> files = input.readStringSet();
|
||||
|
||||
String id;
|
||||
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_ID) {
|
||||
id = input.readString();
|
||||
} else {
|
||||
id = null;
|
||||
}
|
||||
|
||||
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(input);
|
||||
} else {
|
||||
CodecUtil.checkEOF(input);
|
||||
}
|
||||
|
||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
|
||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
|
||||
si.setFiles(files);
|
||||
|
||||
success = true;
|
||||
|
|
|
@ -59,6 +59,7 @@ public class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
|
|||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||
output.writeStringStringMap(si.getDiagnostics());
|
||||
output.writeStringSet(si.files());
|
||||
output.writeString(si.getId());
|
||||
CodecUtil.writeFooter(output);
|
||||
success = true;
|
||||
} finally {
|
||||
|
|
|
@ -484,7 +484,7 @@ public class CheckIndex {
|
|||
}
|
||||
|
||||
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
|
||||
+ " " + versionString + " format=" + sFormat + userDataString);
|
||||
+ " " + versionString + " id=" + sis.getId() + " format=" + sFormat + userDataString);
|
||||
|
||||
if (onlySegments != null) {
|
||||
result.partial = true;
|
||||
|
@ -535,6 +535,7 @@ public class CheckIndex {
|
|||
|
||||
try {
|
||||
msg(infoStream, " version=" + (version == null ? "3.0" : version));
|
||||
msg(infoStream, " id=" + info.info.getId());
|
||||
final Codec codec = info.info.getCodec();
|
||||
msg(infoStream, " codec=" + codec);
|
||||
segInfoStat.codec = codec;
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.util.InfoStream;
|
|||
import org.apache.lucene.util.IntBlockPool;
|
||||
import org.apache.lucene.util.MutableBits;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
|
||||
|
@ -178,7 +179,7 @@ class DocumentsWriterPerThread {
|
|||
pendingUpdates.clear();
|
||||
deleteSlice = deleteQueue.newSlice();
|
||||
|
||||
segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null);
|
||||
segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null, StringHelper.randomId());
|
||||
assert numDocsInRAM == 0;
|
||||
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
|
||||
infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);
|
||||
|
|
|
@ -63,6 +63,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -2566,7 +2567,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
|
||||
|
||||
SegmentInfo info = new SegmentInfo(directory, Version.LATEST, mergedName, -1,
|
||||
false, codec, null);
|
||||
false, codec, null, StringHelper.randomId());
|
||||
|
||||
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
|
||||
MergeState.CheckAbort.NONE, globalFieldNumberMap,
|
||||
|
@ -2667,7 +2668,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
// Same SI as before but we change directory and name
|
||||
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
|
||||
info.info.getUseCompoundFile(), info.info.getCodec(),
|
||||
info.info.getDiagnostics());
|
||||
info.info.getDiagnostics(), StringHelper.randomId());
|
||||
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo,
|
||||
info.getDelCount(), info.getDelGen(), info.getFieldInfosGen(),
|
||||
info.getDocValuesGen());
|
||||
|
@ -3789,7 +3790,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
|||
// ConcurrentMergePolicy we keep deterministic segment
|
||||
// names.
|
||||
final String mergeSegmentName = newSegmentName();
|
||||
SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null);
|
||||
SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null, StringHelper.randomId());
|
||||
Map<String,String> details = new HashMap<>();
|
||||
details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);
|
||||
details.put("mergeFactor", Integer.toString(merge.segments.size()));
|
||||
|
|
|
@ -27,7 +27,6 @@ import java.util.regex.Matcher;
|
|||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -58,10 +57,13 @@ public final class SegmentInfo {
|
|||
|
||||
private boolean isCompoundFile;
|
||||
|
||||
/** Id that uniquely identifies this segment. */
|
||||
private final String id;
|
||||
|
||||
private Codec codec;
|
||||
|
||||
private Map<String,String> diagnostics;
|
||||
|
||||
|
||||
// Tracks the Lucene version this segment was created with, since 3.1. Null
|
||||
// indicates an older than 3.0 index, and it's used to detect a too old index.
|
||||
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
|
||||
|
@ -79,13 +81,23 @@ public final class SegmentInfo {
|
|||
return diagnostics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new complete SegmentInfo instance from
|
||||
* input, with a newly generated random id.
|
||||
*/
|
||||
public SegmentInfo(Directory dir, Version version, String name, int docCount,
|
||||
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics) {
|
||||
this(dir, version, name, docCount, isCompoundFile, codec, diagnostics, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new complete SegmentInfo instance from input.
|
||||
* <p>Note: this is public only to allow access from
|
||||
* the codecs package.</p>
|
||||
*/
|
||||
public SegmentInfo(Directory dir, Version version, String name, int docCount,
|
||||
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics) {
|
||||
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
|
||||
String id) {
|
||||
assert !(dir instanceof TrackingDirectoryWrapper);
|
||||
this.dir = dir;
|
||||
this.version = version;
|
||||
|
@ -94,6 +106,7 @@ public final class SegmentInfo {
|
|||
this.isCompoundFile = isCompoundFile;
|
||||
this.codec = codec;
|
||||
this.diagnostics = diagnostics;
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -212,6 +225,11 @@ public final class SegmentInfo {
|
|||
return version;
|
||||
}
|
||||
|
||||
/** Return the id that uniquely identifies this segment. */
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
private Set<String> setFiles;
|
||||
|
||||
/** Sets the files written for this segment. */
|
||||
|
|
|
@ -27,8 +27,8 @@ import java.util.HashMap;
|
|||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
|
@ -43,6 +43,7 @@ import org.apache.lucene.store.IOContext;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.NoSuchDirectoryException;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* A collection of segmentInfo objects with methods for operating on those
|
||||
|
@ -137,6 +138,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
/** The file format version for the segments_N codec header, since 4.9+ */
|
||||
public static final int VERSION_49 = 3;
|
||||
|
||||
/** The file format version for the segments_N codec header, since 4.10+ */
|
||||
public static final int VERSION_410 = 4;
|
||||
|
||||
// Used for the segments.gen file only!
|
||||
// Whenever you add a new format, make it 1 smaller (negative version logic)!
|
||||
private static final int FORMAT_SEGMENTS_GEN_47 = -2;
|
||||
|
@ -167,6 +171,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
*/
|
||||
private static PrintStream infoStream = null;
|
||||
|
||||
/** Id for this commit; only written starting with Lucene 4.10 */
|
||||
private String id;
|
||||
|
||||
/** Sole constructor. Typically you call this and then
|
||||
* use {@link #read(Directory) or
|
||||
* #read(Directory,String)} to populate each {@link
|
||||
|
@ -317,6 +324,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
nextGeneration);
|
||||
}
|
||||
|
||||
/** Since Lucene 4.10, every commit (segments_N) writes a unique id. This will
|
||||
* return that id, or null if this commit was pre-4.10. */
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a particular segmentFileName. Note that this may
|
||||
* throw an IOException if a commit is in process.
|
||||
|
@ -345,7 +358,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
|
||||
}
|
||||
// 4.0+
|
||||
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_49);
|
||||
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_410);
|
||||
version = input.readLong();
|
||||
counter = input.readInt();
|
||||
int numSegments = input.readInt();
|
||||
|
@ -410,6 +423,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
add(siPerCommit);
|
||||
}
|
||||
userData = input.readStringStringMap();
|
||||
if (format >= VERSION_410) {
|
||||
id = input.readString();
|
||||
}
|
||||
|
||||
if (format >= VERSION_48) {
|
||||
CodecUtil.checkFooter(input);
|
||||
|
@ -470,7 +486,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
|
||||
try {
|
||||
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
|
||||
CodecUtil.writeHeader(segnOutput, "segments", VERSION_49);
|
||||
CodecUtil.writeHeader(segnOutput, "segments", VERSION_410);
|
||||
segnOutput.writeLong(version);
|
||||
segnOutput.writeInt(counter); // write counter
|
||||
segnOutput.writeInt(size()); // write infos
|
||||
|
@ -496,6 +512,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
assert si.dir == directory;
|
||||
}
|
||||
segnOutput.writeStringStringMap(userData);
|
||||
segnOutput.writeString(StringHelper.randomId());
|
||||
pendingSegnOutput = segnOutput;
|
||||
success = true;
|
||||
} finally {
|
||||
|
|
|
@ -17,8 +17,9 @@ package org.apache.lucene.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.StringTokenizer;
|
||||
import java.math.BigInteger;
|
||||
import java.util.Arrays;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Methods for manipulating strings.
|
||||
|
@ -228,4 +229,88 @@ public abstract class StringHelper {
|
|||
public static int murmurhash3_x86_32(BytesRef bytes, int seed) {
|
||||
return murmurhash3_x86_32(bytes.bytes, bytes.offset, bytes.length, seed);
|
||||
}
|
||||
|
||||
// Holds 128 bit unsigned value:
|
||||
private static BigInteger nextId;
|
||||
private static final BigInteger idMask;
|
||||
private static final Object idLock = new Object();
|
||||
private static final String idPad = "00000000000000000000000000000000";
|
||||
|
||||
static {
|
||||
byte[] maskBytes = new byte[16];
|
||||
Arrays.fill(maskBytes, (byte) 0xff);
|
||||
idMask = new BigInteger(maskBytes);
|
||||
String prop = System.getProperty("tests.seed");
|
||||
|
||||
// State for xorshift128:
|
||||
long x0;
|
||||
long x1;
|
||||
|
||||
long seed;
|
||||
if (prop != null) {
|
||||
// So if there is a test failure that somehow relied on this id,
|
||||
// we remain reproducible based on the test seed:
|
||||
if (prop.length() > 8) {
|
||||
prop = prop.substring(prop.length()-8);
|
||||
}
|
||||
x0 = Long.parseLong(prop, 16);
|
||||
x1 = x0;
|
||||
} else {
|
||||
// "Ghetto randomess" from 3 different sources:
|
||||
x0 = System.nanoTime();
|
||||
x1 = StringHelper.class.hashCode() << 32;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
// Properties can vary across JVM instances:
|
||||
Properties p = System.getProperties();
|
||||
for (String s: p.stringPropertyNames()) {
|
||||
sb.append(s);
|
||||
sb.append(p.getProperty(s));
|
||||
}
|
||||
x1 |= sb.toString().hashCode();
|
||||
// TODO: maybe read from /dev/urandom when it's available?
|
||||
}
|
||||
|
||||
// Use a few iterations of xorshift128 to scatter the seed
|
||||
// in case multiple Lucene instances starting up "near" the same
|
||||
// nanoTime, since we use ++ (mod 2^128) for full period cycle:
|
||||
for(int i=0;i<10;i++) {
|
||||
long s1 = x0;
|
||||
long s0 = x1;
|
||||
x0 = s0;
|
||||
s1 ^= s1 << 23; // a
|
||||
x1 = s1 ^ s0 ^ (s1 >>> 17) ^ (s0 >>> 26); // b, c
|
||||
}
|
||||
|
||||
// Concatentate bits of x0 and x1, as unsigned 128 bit integer:
|
||||
nextId = new BigInteger(1, BigInteger.valueOf(x0).shiftLeft(64).or(BigInteger.valueOf(x1)).toByteArray());
|
||||
}
|
||||
|
||||
/** Generates a non-cryptographic globally unique id. */
|
||||
public static String randomId() {
|
||||
|
||||
// NOTE: we don't use Java's UUID.randomUUID() implementation here because:
|
||||
//
|
||||
// * It's overkill for our usage: it tries to be cryptographically
|
||||
// secure, whereas for this use we don't care if someone can
|
||||
// guess the IDs.
|
||||
//
|
||||
// * It uses SecureRandom, which on Linux can easily take a long time
|
||||
// (I saw ~ 10 seconds just running a Lucene test) when entropy
|
||||
// harvesting is falling behind.
|
||||
//
|
||||
// * It loses a few (6) bits to version and variant and it's not clear
|
||||
// what impact that has on the period, whereas the simple ++ (mod 2^128)
|
||||
// we use here is guaranteed to have the full period.
|
||||
|
||||
String id;
|
||||
synchronized(idLock) {
|
||||
id = nextId.toString(16);
|
||||
nextId = nextId.add(BigInteger.ONE).and(idMask);
|
||||
}
|
||||
|
||||
assert id.length() <= 32: "id=" + id;
|
||||
id = idPad.substring(id.length()) + id;
|
||||
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,11 +81,12 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.SetOnce;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestIndexWriter extends LuceneTestCase {
|
||||
|
@ -2819,4 +2820,55 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
iw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-5895:
|
||||
|
||||
/** Make sure we see ids per segment and per commit. */
|
||||
public void testIds() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||
w.addDocument(new Document());
|
||||
w.close();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(d);
|
||||
String id1 = sis.getId();
|
||||
assertNotNull(id1);
|
||||
|
||||
String id2 = sis.info(0).info.getId();
|
||||
if (defaultCodecSupportsSegmentIds()) {
|
||||
assertNotNull(id2);
|
||||
} else {
|
||||
assertNull(id2);
|
||||
}
|
||||
|
||||
// Make sure CheckIndex includes id output:
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(d);
|
||||
checker.setCrossCheckTermVectors(false);
|
||||
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex(null);
|
||||
String s = bos.toString(IOUtils.UTF_8);
|
||||
// Make sure CheckIndex didn't fail
|
||||
assertTrue(s, indexStatus != null && indexStatus.clean);
|
||||
|
||||
// Commit id is always stored:
|
||||
assertTrue("missing id=" + id1 + " in:\n" + s, s.contains("id=" + id1));
|
||||
|
||||
// Per-segment id may or may not be stored depending on the codec:
|
||||
if (defaultCodecSupportsSegmentIds()) {
|
||||
assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2));
|
||||
} else {
|
||||
assertTrue("missing id=null in:\n" + s, s.contains("id=null"));
|
||||
}
|
||||
d.close();
|
||||
|
||||
Set<String> ids = new HashSet<>();
|
||||
for(int i=0;i<100000;i++) {
|
||||
String id = StringHelper.randomId();
|
||||
assertFalse("id=" + id + " i=" + i, ids.contains(id));
|
||||
ids.add(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,6 +55,9 @@ import java.util.logging.Logger;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoFormat;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -1736,6 +1739,12 @@ public abstract class LuceneTestCase extends Assert {
|
|||
return true;
|
||||
}
|
||||
|
||||
/** Returns true if the codec "supports" writing segment and commit ids. */
|
||||
public static boolean defaultCodecSupportsSegmentIds() {
|
||||
SegmentInfoFormat siFormat = Codec.getDefault().segmentInfoFormat();
|
||||
return siFormat instanceof SimpleTextSegmentInfoFormat || siFormat instanceof Lucene46SegmentInfoFormat;
|
||||
}
|
||||
|
||||
public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||
assertReaderStatisticsEquals(info, leftReader, rightReader);
|
||||
assertFieldsEquals(info, leftReader, MultiFields.getFields(leftReader), MultiFields.getFields(rightReader), true);
|
||||
|
|
Loading…
Reference in New Issue