mirror of https://github.com/apache/lucene.git
LUCENE-5985: add id for each segment and commit to aid replication
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1619620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
271af418c5
commit
1d5d73eefc
|
@ -98,6 +98,11 @@ Other
|
||||||
======================= Lucene 4.11.0 ======================
|
======================= Lucene 4.11.0 ======================
|
||||||
(No Changes)
|
(No Changes)
|
||||||
|
|
||||||
|
New Features
|
||||||
|
|
||||||
|
* LUCENE-5895: Lucene now stores a unique id per-segment and per-commit to aid
|
||||||
|
in accurate replication of index files (Robert Muir, Mike McCandless)
|
||||||
|
|
||||||
======================= Lucene 4.10.0 ======================
|
======================= Lucene 4.10.0 ======================
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
|
@ -17,15 +17,6 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -44,6 +35,16 @@ import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_ID;
|
||||||
|
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* reads plaintext segments files
|
* reads plaintext segments files
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -99,10 +100,14 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
|
||||||
files.add(fileName);
|
files.add(fileName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_ID);
|
||||||
|
final String id = readString(SI_ID.length, scratch);
|
||||||
|
|
||||||
SimpleTextUtil.checkFooter(input);
|
SimpleTextUtil.checkFooter(input);
|
||||||
|
|
||||||
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
|
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
|
||||||
isCompoundFile, null, diagnostics);
|
isCompoundFile, null, diagnostics, id);
|
||||||
info.setFiles(files);
|
info.setFiles(files);
|
||||||
success = true;
|
success = true;
|
||||||
return info;
|
return info;
|
||||||
|
|
|
@ -48,6 +48,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
||||||
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
|
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
|
||||||
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
|
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
|
||||||
final static BytesRef SI_FILE = new BytesRef(" file ");
|
final static BytesRef SI_FILE = new BytesRef(" file ");
|
||||||
|
final static BytesRef SI_ID = new BytesRef(" id ");
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
||||||
|
@ -104,6 +105,10 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
||||||
SimpleTextUtil.writeNewline(output);
|
SimpleTextUtil.writeNewline(output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.write(output, SI_ID);
|
||||||
|
SimpleTextUtil.write(output, si.getId(), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
SimpleTextUtil.writeChecksum(output, scratch);
|
SimpleTextUtil.writeChecksum(output, scratch);
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
||||||
* <p>
|
* <p>
|
||||||
* Files:
|
* Files:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
|
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Id, Footer
|
||||||
* </ul>
|
* </ul>
|
||||||
* </p>
|
* </p>
|
||||||
* Data types:
|
* Data types:
|
||||||
|
@ -44,6 +44,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
||||||
* <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
* <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||||
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
|
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
|
||||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||||
|
* <li>Id --> {@link DataOutput#writeString String}</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* </p>
|
* </p>
|
||||||
* Field Descriptions:
|
* Field Descriptions:
|
||||||
|
@ -88,5 +89,6 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
|
||||||
static final String CODEC_NAME = "Lucene46SegmentInfo";
|
static final String CODEC_NAME = "Lucene46SegmentInfo";
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CHECKSUM = 1;
|
static final int VERSION_CHECKSUM = 1;
|
||||||
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
static final int VERSION_ID = 2;
|
||||||
|
static final int VERSION_CURRENT = VERSION_ID;
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,13 +62,20 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
|
||||||
final Map<String,String> diagnostics = input.readStringStringMap();
|
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||||
final Set<String> files = input.readStringSet();
|
final Set<String> files = input.readStringSet();
|
||||||
|
|
||||||
|
String id;
|
||||||
|
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_ID) {
|
||||||
|
id = input.readString();
|
||||||
|
} else {
|
||||||
|
id = null;
|
||||||
|
}
|
||||||
|
|
||||||
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
|
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
|
||||||
CodecUtil.checkFooter(input);
|
CodecUtil.checkFooter(input);
|
||||||
} else {
|
} else {
|
||||||
CodecUtil.checkEOF(input);
|
CodecUtil.checkEOF(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics);
|
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
|
||||||
si.setFiles(files);
|
si.setFiles(files);
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
|
|
|
@ -59,6 +59,7 @@ public class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
|
||||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||||
output.writeStringStringMap(si.getDiagnostics());
|
output.writeStringStringMap(si.getDiagnostics());
|
||||||
output.writeStringSet(si.files());
|
output.writeStringSet(si.files());
|
||||||
|
output.writeString(si.getId());
|
||||||
CodecUtil.writeFooter(output);
|
CodecUtil.writeFooter(output);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -484,7 +484,7 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
|
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
|
||||||
+ " " + versionString + " format=" + sFormat + userDataString);
|
+ " " + versionString + " id=" + sis.getId() + " format=" + sFormat + userDataString);
|
||||||
|
|
||||||
if (onlySegments != null) {
|
if (onlySegments != null) {
|
||||||
result.partial = true;
|
result.partial = true;
|
||||||
|
@ -535,6 +535,7 @@ public class CheckIndex {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
msg(infoStream, " version=" + (version == null ? "3.0" : version));
|
msg(infoStream, " version=" + (version == null ? "3.0" : version));
|
||||||
|
msg(infoStream, " id=" + info.info.getId());
|
||||||
final Codec codec = info.info.getCodec();
|
final Codec codec = info.info.getCodec();
|
||||||
msg(infoStream, " codec=" + codec);
|
msg(infoStream, " codec=" + codec);
|
||||||
segInfoStat.codec = codec;
|
segInfoStat.codec = codec;
|
||||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.util.InfoStream;
|
||||||
import org.apache.lucene.util.IntBlockPool;
|
import org.apache.lucene.util.IntBlockPool;
|
||||||
import org.apache.lucene.util.MutableBits;
|
import org.apache.lucene.util.MutableBits;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
|
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
|
||||||
|
@ -178,7 +179,7 @@ class DocumentsWriterPerThread {
|
||||||
pendingUpdates.clear();
|
pendingUpdates.clear();
|
||||||
deleteSlice = deleteQueue.newSlice();
|
deleteSlice = deleteQueue.newSlice();
|
||||||
|
|
||||||
segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null);
|
segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null, StringHelper.randomId());
|
||||||
assert numDocsInRAM == 0;
|
assert numDocsInRAM == 0;
|
||||||
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
|
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
|
||||||
infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);
|
infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);
|
||||||
|
|
|
@ -63,6 +63,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.Constants;
|
import org.apache.lucene.util.Constants;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.ThreadInterruptedException;
|
import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -2566,7 +2567,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
|
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
|
||||||
|
|
||||||
SegmentInfo info = new SegmentInfo(directory, Version.LATEST, mergedName, -1,
|
SegmentInfo info = new SegmentInfo(directory, Version.LATEST, mergedName, -1,
|
||||||
false, codec, null);
|
false, codec, null, StringHelper.randomId());
|
||||||
|
|
||||||
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
|
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
|
||||||
MergeState.CheckAbort.NONE, globalFieldNumberMap,
|
MergeState.CheckAbort.NONE, globalFieldNumberMap,
|
||||||
|
@ -2667,7 +2668,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
// Same SI as before but we change directory and name
|
// Same SI as before but we change directory and name
|
||||||
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
|
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
|
||||||
info.info.getUseCompoundFile(), info.info.getCodec(),
|
info.info.getUseCompoundFile(), info.info.getCodec(),
|
||||||
info.info.getDiagnostics());
|
info.info.getDiagnostics(), StringHelper.randomId());
|
||||||
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo,
|
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo,
|
||||||
info.getDelCount(), info.getDelGen(), info.getFieldInfosGen(),
|
info.getDelCount(), info.getDelGen(), info.getFieldInfosGen(),
|
||||||
info.getDocValuesGen());
|
info.getDocValuesGen());
|
||||||
|
@ -3789,7 +3790,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
// ConcurrentMergePolicy we keep deterministic segment
|
// ConcurrentMergePolicy we keep deterministic segment
|
||||||
// names.
|
// names.
|
||||||
final String mergeSegmentName = newSegmentName();
|
final String mergeSegmentName = newSegmentName();
|
||||||
SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null);
|
SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null, StringHelper.randomId());
|
||||||
Map<String,String> details = new HashMap<>();
|
Map<String,String> details = new HashMap<>();
|
||||||
details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);
|
details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);
|
||||||
details.put("mergeFactor", Integer.toString(merge.segments.size()));
|
details.put("mergeFactor", Integer.toString(merge.segments.size()));
|
||||||
|
|
|
@ -27,7 +27,6 @@ import java.util.regex.Matcher;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
import org.apache.lucene.store.TrackingDirectoryWrapper;
|
||||||
import org.apache.lucene.util.Constants;
|
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
@ -58,10 +57,13 @@ public final class SegmentInfo {
|
||||||
|
|
||||||
private boolean isCompoundFile;
|
private boolean isCompoundFile;
|
||||||
|
|
||||||
|
/** Id that uniquely identifies this segment. */
|
||||||
|
private final String id;
|
||||||
|
|
||||||
private Codec codec;
|
private Codec codec;
|
||||||
|
|
||||||
private Map<String,String> diagnostics;
|
private Map<String,String> diagnostics;
|
||||||
|
|
||||||
// Tracks the Lucene version this segment was created with, since 3.1. Null
|
// Tracks the Lucene version this segment was created with, since 3.1. Null
|
||||||
// indicates an older than 3.0 index, and it's used to detect a too old index.
|
// indicates an older than 3.0 index, and it's used to detect a too old index.
|
||||||
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
|
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
|
||||||
|
@ -79,13 +81,23 @@ public final class SegmentInfo {
|
||||||
return diagnostics;
|
return diagnostics;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a new complete SegmentInfo instance from
|
||||||
|
* input, with a newly generated random id.
|
||||||
|
*/
|
||||||
|
public SegmentInfo(Directory dir, Version version, String name, int docCount,
|
||||||
|
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics) {
|
||||||
|
this(dir, version, name, docCount, isCompoundFile, codec, diagnostics, null);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new complete SegmentInfo instance from input.
|
* Construct a new complete SegmentInfo instance from input.
|
||||||
* <p>Note: this is public only to allow access from
|
* <p>Note: this is public only to allow access from
|
||||||
* the codecs package.</p>
|
* the codecs package.</p>
|
||||||
*/
|
*/
|
||||||
public SegmentInfo(Directory dir, Version version, String name, int docCount,
|
public SegmentInfo(Directory dir, Version version, String name, int docCount,
|
||||||
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics) {
|
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
|
||||||
|
String id) {
|
||||||
assert !(dir instanceof TrackingDirectoryWrapper);
|
assert !(dir instanceof TrackingDirectoryWrapper);
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
this.version = version;
|
this.version = version;
|
||||||
|
@ -94,6 +106,7 @@ public final class SegmentInfo {
|
||||||
this.isCompoundFile = isCompoundFile;
|
this.isCompoundFile = isCompoundFile;
|
||||||
this.codec = codec;
|
this.codec = codec;
|
||||||
this.diagnostics = diagnostics;
|
this.diagnostics = diagnostics;
|
||||||
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -212,6 +225,11 @@ public final class SegmentInfo {
|
||||||
return version;
|
return version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return the id that uniquely identifies this segment. */
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
private Set<String> setFiles;
|
private Set<String> setFiles;
|
||||||
|
|
||||||
/** Sets the files written for this segment. */
|
/** Sets the files written for this segment. */
|
||||||
|
|
|
@ -27,8 +27,8 @@ import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
@ -43,6 +43,7 @@ import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.NoSuchDirectoryException;
|
import org.apache.lucene.store.NoSuchDirectoryException;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A collection of segmentInfo objects with methods for operating on those
|
* A collection of segmentInfo objects with methods for operating on those
|
||||||
|
@ -137,6 +138,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
/** The file format version for the segments_N codec header, since 4.9+ */
|
/** The file format version for the segments_N codec header, since 4.9+ */
|
||||||
public static final int VERSION_49 = 3;
|
public static final int VERSION_49 = 3;
|
||||||
|
|
||||||
|
/** The file format version for the segments_N codec header, since 4.10+ */
|
||||||
|
public static final int VERSION_410 = 4;
|
||||||
|
|
||||||
// Used for the segments.gen file only!
|
// Used for the segments.gen file only!
|
||||||
// Whenever you add a new format, make it 1 smaller (negative version logic)!
|
// Whenever you add a new format, make it 1 smaller (negative version logic)!
|
||||||
private static final int FORMAT_SEGMENTS_GEN_47 = -2;
|
private static final int FORMAT_SEGMENTS_GEN_47 = -2;
|
||||||
|
@ -167,6 +171,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
*/
|
*/
|
||||||
private static PrintStream infoStream = null;
|
private static PrintStream infoStream = null;
|
||||||
|
|
||||||
|
/** Id for this commit; only written starting with Lucene 4.10 */
|
||||||
|
private String id;
|
||||||
|
|
||||||
/** Sole constructor. Typically you call this and then
|
/** Sole constructor. Typically you call this and then
|
||||||
* use {@link #read(Directory) or
|
* use {@link #read(Directory) or
|
||||||
* #read(Directory,String)} to populate each {@link
|
* #read(Directory,String)} to populate each {@link
|
||||||
|
@ -317,6 +324,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
nextGeneration);
|
nextGeneration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Since Lucene 4.10, every commit (segments_N) writes a unique id. This will
|
||||||
|
* return that id, or null if this commit was pre-4.10. */
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read a particular segmentFileName. Note that this may
|
* Read a particular segmentFileName. Note that this may
|
||||||
* throw an IOException if a commit is in process.
|
* throw an IOException if a commit is in process.
|
||||||
|
@ -345,7 +358,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
|
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
|
||||||
}
|
}
|
||||||
// 4.0+
|
// 4.0+
|
||||||
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_49);
|
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_410);
|
||||||
version = input.readLong();
|
version = input.readLong();
|
||||||
counter = input.readInt();
|
counter = input.readInt();
|
||||||
int numSegments = input.readInt();
|
int numSegments = input.readInt();
|
||||||
|
@ -410,6 +423,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
add(siPerCommit);
|
add(siPerCommit);
|
||||||
}
|
}
|
||||||
userData = input.readStringStringMap();
|
userData = input.readStringStringMap();
|
||||||
|
if (format >= VERSION_410) {
|
||||||
|
id = input.readString();
|
||||||
|
}
|
||||||
|
|
||||||
if (format >= VERSION_48) {
|
if (format >= VERSION_48) {
|
||||||
CodecUtil.checkFooter(input);
|
CodecUtil.checkFooter(input);
|
||||||
|
@ -470,7 +486,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
|
|
||||||
try {
|
try {
|
||||||
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
|
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
|
||||||
CodecUtil.writeHeader(segnOutput, "segments", VERSION_49);
|
CodecUtil.writeHeader(segnOutput, "segments", VERSION_410);
|
||||||
segnOutput.writeLong(version);
|
segnOutput.writeLong(version);
|
||||||
segnOutput.writeInt(counter); // write counter
|
segnOutput.writeInt(counter); // write counter
|
||||||
segnOutput.writeInt(size()); // write infos
|
segnOutput.writeInt(size()); // write infos
|
||||||
|
@ -496,6 +512,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
assert si.dir == directory;
|
assert si.dir == directory;
|
||||||
}
|
}
|
||||||
segnOutput.writeStringStringMap(userData);
|
segnOutput.writeStringStringMap(userData);
|
||||||
|
segnOutput.writeString(StringHelper.randomId());
|
||||||
pendingSegnOutput = segnOutput;
|
pendingSegnOutput = segnOutput;
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -17,8 +17,9 @@ package org.apache.lucene.util;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.math.BigInteger;
|
||||||
import java.util.StringTokenizer;
|
import java.util.Arrays;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Methods for manipulating strings.
|
* Methods for manipulating strings.
|
||||||
|
@ -228,4 +229,88 @@ public abstract class StringHelper {
|
||||||
public static int murmurhash3_x86_32(BytesRef bytes, int seed) {
|
public static int murmurhash3_x86_32(BytesRef bytes, int seed) {
|
||||||
return murmurhash3_x86_32(bytes.bytes, bytes.offset, bytes.length, seed);
|
return murmurhash3_x86_32(bytes.bytes, bytes.offset, bytes.length, seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Holds 128 bit unsigned value:
|
||||||
|
private static BigInteger nextId;
|
||||||
|
private static final BigInteger idMask;
|
||||||
|
private static final Object idLock = new Object();
|
||||||
|
private static final String idPad = "00000000000000000000000000000000";
|
||||||
|
|
||||||
|
static {
|
||||||
|
byte[] maskBytes = new byte[16];
|
||||||
|
Arrays.fill(maskBytes, (byte) 0xff);
|
||||||
|
idMask = new BigInteger(maskBytes);
|
||||||
|
String prop = System.getProperty("tests.seed");
|
||||||
|
|
||||||
|
// State for xorshift128:
|
||||||
|
long x0;
|
||||||
|
long x1;
|
||||||
|
|
||||||
|
long seed;
|
||||||
|
if (prop != null) {
|
||||||
|
// So if there is a test failure that somehow relied on this id,
|
||||||
|
// we remain reproducible based on the test seed:
|
||||||
|
if (prop.length() > 8) {
|
||||||
|
prop = prop.substring(prop.length()-8);
|
||||||
|
}
|
||||||
|
x0 = Long.parseLong(prop, 16);
|
||||||
|
x1 = x0;
|
||||||
|
} else {
|
||||||
|
// "Ghetto randomess" from 3 different sources:
|
||||||
|
x0 = System.nanoTime();
|
||||||
|
x1 = StringHelper.class.hashCode() << 32;
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
// Properties can vary across JVM instances:
|
||||||
|
Properties p = System.getProperties();
|
||||||
|
for (String s: p.stringPropertyNames()) {
|
||||||
|
sb.append(s);
|
||||||
|
sb.append(p.getProperty(s));
|
||||||
|
}
|
||||||
|
x1 |= sb.toString().hashCode();
|
||||||
|
// TODO: maybe read from /dev/urandom when it's available?
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use a few iterations of xorshift128 to scatter the seed
|
||||||
|
// in case multiple Lucene instances starting up "near" the same
|
||||||
|
// nanoTime, since we use ++ (mod 2^128) for full period cycle:
|
||||||
|
for(int i=0;i<10;i++) {
|
||||||
|
long s1 = x0;
|
||||||
|
long s0 = x1;
|
||||||
|
x0 = s0;
|
||||||
|
s1 ^= s1 << 23; // a
|
||||||
|
x1 = s1 ^ s0 ^ (s1 >>> 17) ^ (s0 >>> 26); // b, c
|
||||||
|
}
|
||||||
|
|
||||||
|
// Concatentate bits of x0 and x1, as unsigned 128 bit integer:
|
||||||
|
nextId = new BigInteger(1, BigInteger.valueOf(x0).shiftLeft(64).or(BigInteger.valueOf(x1)).toByteArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Generates a non-cryptographic globally unique id. */
|
||||||
|
public static String randomId() {
|
||||||
|
|
||||||
|
// NOTE: we don't use Java's UUID.randomUUID() implementation here because:
|
||||||
|
//
|
||||||
|
// * It's overkill for our usage: it tries to be cryptographically
|
||||||
|
// secure, whereas for this use we don't care if someone can
|
||||||
|
// guess the IDs.
|
||||||
|
//
|
||||||
|
// * It uses SecureRandom, which on Linux can easily take a long time
|
||||||
|
// (I saw ~ 10 seconds just running a Lucene test) when entropy
|
||||||
|
// harvesting is falling behind.
|
||||||
|
//
|
||||||
|
// * It loses a few (6) bits to version and variant and it's not clear
|
||||||
|
// what impact that has on the period, whereas the simple ++ (mod 2^128)
|
||||||
|
// we use here is guaranteed to have the full period.
|
||||||
|
|
||||||
|
String id;
|
||||||
|
synchronized(idLock) {
|
||||||
|
id = nextId.toString(16);
|
||||||
|
nextId = nextId.add(BigInteger.ONE).and(idMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert id.length() <= 32: "id=" + id;
|
||||||
|
id = idPad.substring(id.length()) + id;
|
||||||
|
|
||||||
|
return id;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,11 +81,12 @@ import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.SetOnce;
|
import org.apache.lucene.util.SetOnce;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.ThreadInterruptedException;
|
import org.apache.lucene.util.ThreadInterruptedException;
|
||||||
import org.apache.lucene.util.automaton.Automata;
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
|
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class TestIndexWriter extends LuceneTestCase {
|
public class TestIndexWriter extends LuceneTestCase {
|
||||||
|
@ -2819,4 +2820,55 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-5895:
|
||||||
|
|
||||||
|
/** Make sure we see ids per segment and per commit. */
|
||||||
|
public void testIds() throws Exception {
|
||||||
|
Directory d = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
w.addDocument(new Document());
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
SegmentInfos sis = new SegmentInfos();
|
||||||
|
sis.read(d);
|
||||||
|
String id1 = sis.getId();
|
||||||
|
assertNotNull(id1);
|
||||||
|
|
||||||
|
String id2 = sis.info(0).info.getId();
|
||||||
|
if (defaultCodecSupportsSegmentIds()) {
|
||||||
|
assertNotNull(id2);
|
||||||
|
} else {
|
||||||
|
assertNull(id2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure CheckIndex includes id output:
|
||||||
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||||
|
CheckIndex checker = new CheckIndex(d);
|
||||||
|
checker.setCrossCheckTermVectors(false);
|
||||||
|
checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), false);
|
||||||
|
CheckIndex.Status indexStatus = checker.checkIndex(null);
|
||||||
|
String s = bos.toString(IOUtils.UTF_8);
|
||||||
|
// Make sure CheckIndex didn't fail
|
||||||
|
assertTrue(s, indexStatus != null && indexStatus.clean);
|
||||||
|
|
||||||
|
// Commit id is always stored:
|
||||||
|
assertTrue("missing id=" + id1 + " in:\n" + s, s.contains("id=" + id1));
|
||||||
|
|
||||||
|
// Per-segment id may or may not be stored depending on the codec:
|
||||||
|
if (defaultCodecSupportsSegmentIds()) {
|
||||||
|
assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2));
|
||||||
|
} else {
|
||||||
|
assertTrue("missing id=null in:\n" + s, s.contains("id=null"));
|
||||||
|
}
|
||||||
|
d.close();
|
||||||
|
|
||||||
|
Set<String> ids = new HashSet<>();
|
||||||
|
for(int i=0;i<100000;i++) {
|
||||||
|
String id = StringHelper.randomId();
|
||||||
|
assertFalse("id=" + id + " i=" + i, ids.contains(id));
|
||||||
|
ids.add(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,9 @@ import java.util.logging.Logger;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene46.Lucene46SegmentInfoFormat;
|
||||||
|
import org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoFormat;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
@ -1736,6 +1739,12 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns true if the codec "supports" writing segment and commit ids. */
|
||||||
|
public static boolean defaultCodecSupportsSegmentIds() {
|
||||||
|
SegmentInfoFormat siFormat = Codec.getDefault().segmentInfoFormat();
|
||||||
|
return siFormat instanceof SimpleTextSegmentInfoFormat || siFormat instanceof Lucene46SegmentInfoFormat;
|
||||||
|
}
|
||||||
|
|
||||||
public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
public void assertReaderEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
|
||||||
assertReaderStatisticsEquals(info, leftReader, rightReader);
|
assertReaderStatisticsEquals(info, leftReader, rightReader);
|
||||||
assertFieldsEquals(info, leftReader, MultiFields.getFields(leftReader), MultiFields.getFields(rightReader), true);
|
assertFieldsEquals(info, leftReader, MultiFields.getFields(leftReader), MultiFields.getFields(rightReader), true);
|
||||||
|
|
Loading…
Reference in New Issue