mirror of https://github.com/apache/lucene.git
LUCENE-5969, LUCENE-5895: fix sign bit bugs in segment/commit IDs, use byte[] representation
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1627714 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
afee9af13f
commit
88648b3a9c
|
@ -31,7 +31,7 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
||||||
* <p>
|
* <p>
|
||||||
* Files:
|
* Files:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Id, Footer
|
* <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
|
||||||
* </ul>
|
* </ul>
|
||||||
* </p>
|
* </p>
|
||||||
* Data types:
|
* Data types:
|
||||||
|
@ -44,7 +44,6 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
||||||
* <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
* <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||||
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
|
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
|
||||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||||
* <li>Id --> {@link DataOutput#writeString String}</li>
|
|
||||||
* </ul>
|
* </ul>
|
||||||
* </p>
|
* </p>
|
||||||
* Field Descriptions:
|
* Field Descriptions:
|
||||||
|
@ -88,6 +87,5 @@ public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
|
||||||
static final String CODEC_NAME = "Lucene46SegmentInfo";
|
static final String CODEC_NAME = "Lucene46SegmentInfo";
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CHECKSUM = 1;
|
static final int VERSION_CHECKSUM = 1;
|
||||||
static final int VERSION_ID = 2;
|
static final int VERSION_CURRENT = VERSION_CHECKSUM;
|
||||||
static final int VERSION_CURRENT = VERSION_ID;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,13 +65,6 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
|
||||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||||
final Map<String,String> diagnostics = input.readStringStringMap();
|
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||||
final Set<String> files = input.readStringSet();
|
final Set<String> files = input.readStringSet();
|
||||||
|
|
||||||
String id;
|
|
||||||
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_ID) {
|
|
||||||
id = input.readString();
|
|
||||||
} else {
|
|
||||||
id = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
|
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
|
||||||
CodecUtil.checkFooter(input);
|
CodecUtil.checkFooter(input);
|
||||||
|
@ -79,7 +72,7 @@ public class Lucene46SegmentInfoReader extends SegmentInfoReader {
|
||||||
CodecUtil.checkEOF(input);
|
CodecUtil.checkEOF(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
|
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
|
||||||
si.setFiles(files);
|
si.setFiles(files);
|
||||||
|
|
||||||
return si;
|
return si;
|
||||||
|
|
|
@ -64,7 +64,6 @@ public class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
|
||||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||||
output.writeStringStringMap(si.getDiagnostics());
|
output.writeStringStringMap(si.getDiagnostics());
|
||||||
output.writeStringSet(si.files());
|
output.writeStringSet(si.files());
|
||||||
output.writeString(si.getId());
|
|
||||||
CodecUtil.writeFooter(output);
|
CodecUtil.writeFooter(output);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -109,7 +110,7 @@ public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_ID);
|
assert StringHelper.startsWith(scratch.get(), SI_ID);
|
||||||
final String id = readString(SI_ID.length, scratch);
|
final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
|
||||||
|
|
||||||
SimpleTextUtil.checkFooter(input);
|
SimpleTextUtil.checkFooter(input);
|
||||||
|
|
||||||
|
|
|
@ -107,7 +107,7 @@ public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
SimpleTextUtil.write(output, SI_ID);
|
SimpleTextUtil.write(output, SI_ID);
|
||||||
SimpleTextUtil.write(output, si.getId(), scratch);
|
SimpleTextUtil.write(output, new BytesRef(si.getId()));
|
||||||
SimpleTextUtil.writeNewline(output);
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
SimpleTextUtil.writeChecksum(output, scratch);
|
SimpleTextUtil.writeChecksum(output, scratch);
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||||
|
@ -31,6 +32,7 @@ import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class for reading and writing versioned headers.
|
* Utility class for reading and writing versioned headers.
|
||||||
|
@ -94,12 +96,12 @@ public final class CodecUtil {
|
||||||
* Writes a codec header for a per-segment, which records both a string to
|
* Writes a codec header for a per-segment, which records both a string to
|
||||||
* identify the file, a version number, and the unique ID of the segment.
|
* identify the file, a version number, and the unique ID of the segment.
|
||||||
* This header can be parsed and validated with
|
* This header can be parsed and validated with
|
||||||
* {@link #checkSegmentHeader(DataInput, String, int, int, String) checkSegmentHeader()}.
|
* {@link #checkSegmentHeader(DataInput, String, int, int, byte[]) checkSegmentHeader()}.
|
||||||
* <p>
|
* <p>
|
||||||
* CodecSegmentHeader --> CodecHeader,SegmentID
|
* CodecSegmentHeader --> CodecHeader,SegmentID
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>CodecHeader --> {@link #writeHeader}
|
* <li>CodecHeader --> {@link #writeHeader}
|
||||||
* <li>SegmentID --> {@link DataOutput#writeString String}.
|
* <li>SegmentID --> {@link DataOutput#writeByte byte}<sup>16</sup>.
|
||||||
* Unique identifier for the segment.
|
* Unique identifier for the segment.
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -113,13 +115,15 @@ public final class CodecUtil {
|
||||||
* @param segmentID Unique identifier for the segment
|
* @param segmentID Unique identifier for the segment
|
||||||
* @param version Version number
|
* @param version Version number
|
||||||
* @throws IOException If there is an I/O error writing to the underlying medium.
|
* @throws IOException If there is an I/O error writing to the underlying medium.
|
||||||
* @throws IllegalArgumentException If the codec name is not simple ASCII, or is more than 127 characters in length
|
* @throws IllegalArgumentException If the codec name is not simple ASCII, or
|
||||||
|
* is more than 127 characters in length, or if segmentID is invalid.
|
||||||
*/
|
*/
|
||||||
// nocommit: fix javadocs, add segmentLength()
|
public static void writeSegmentHeader(DataOutput out, String codec, int version, byte[] segmentID) throws IOException {
|
||||||
public static void writeSegmentHeader(DataOutput out, String codec, int version, String segmentID) throws IOException {
|
if (segmentID.length != StringHelper.ID_LENGTH) {
|
||||||
|
throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(segmentID));
|
||||||
|
}
|
||||||
writeHeader(out, codec, version);
|
writeHeader(out, codec, version);
|
||||||
// nocommit: improve encoding of this ID
|
out.writeBytes(segmentID, 0, segmentID.length);
|
||||||
out.writeString(segmentID);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -132,6 +136,17 @@ public final class CodecUtil {
|
||||||
public static int headerLength(String codec) {
|
public static int headerLength(String codec) {
|
||||||
return 9+codec.length();
|
return 9+codec.length();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the length of a segment header.
|
||||||
|
*
|
||||||
|
* @param codec Codec name.
|
||||||
|
* @return length of the entire segment header.
|
||||||
|
* @see #writeSegmentHeader(DataOutput, String, int, byte[])
|
||||||
|
*/
|
||||||
|
public static int segmentHeaderLength(String codec) {
|
||||||
|
return headerLength(codec) + StringHelper.ID_LENGTH;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads and validates a header previously written with
|
* Reads and validates a header previously written with
|
||||||
|
@ -192,7 +207,7 @@ public final class CodecUtil {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads and validates a header previously written with
|
* Reads and validates a header previously written with
|
||||||
* {@link #writeSegmentHeader(DataOutput, String, int, String)}.
|
* {@link #writeSegmentHeader(DataOutput, String, int, byte[])}.
|
||||||
* <p>
|
* <p>
|
||||||
* When reading a file, supply the expected <code>codec</code>,
|
* When reading a file, supply the expected <code>codec</code>,
|
||||||
* expected version range (<code>minVersion to maxVersion</code>),
|
* expected version range (<code>minVersion to maxVersion</code>),
|
||||||
|
@ -219,13 +234,15 @@ public final class CodecUtil {
|
||||||
* @throws IndexFormatTooNewException If the actual version is greater
|
* @throws IndexFormatTooNewException If the actual version is greater
|
||||||
* than <code>maxVersion</code>.
|
* than <code>maxVersion</code>.
|
||||||
* @throws IOException If there is an I/O error reading from the underlying medium.
|
* @throws IOException If there is an I/O error reading from the underlying medium.
|
||||||
* @see #writeSegmentHeader(DataOutput, String, int, String)
|
* @see #writeSegmentHeader(DataOutput, String, int, byte[])
|
||||||
*/
|
*/
|
||||||
public static int checkSegmentHeader(DataInput in, String codec, int minVersion, int maxVersion, String segmentID) throws IOException {
|
public static int checkSegmentHeader(DataInput in, String codec, int minVersion, int maxVersion, byte[] segmentID) throws IOException {
|
||||||
int version = checkHeader(in, codec, minVersion, maxVersion);
|
int version = checkHeader(in, codec, minVersion, maxVersion);
|
||||||
String id = in.readString();
|
byte id[] = new byte[StringHelper.ID_LENGTH];
|
||||||
if (!id.equals(segmentID)) {
|
in.readBytes(id, 0, id.length);
|
||||||
throw new CorruptIndexException("file mismatch, expected segment id=" + segmentID + ", got=" + id, in);
|
if (!Arrays.equals(id, segmentID)) {
|
||||||
|
throw new CorruptIndexException("file mismatch, expected segment id=" + StringHelper.idToString(segmentID)
|
||||||
|
+ ", got=" + StringHelper.idToString(id), in);
|
||||||
}
|
}
|
||||||
return version;
|
return version;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -69,7 +70,8 @@ public class Lucene50SegmentInfoReader extends SegmentInfoReader {
|
||||||
final Map<String,String> diagnostics = input.readStringStringMap();
|
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||||
final Set<String> files = input.readStringSet();
|
final Set<String> files = input.readStringSet();
|
||||||
|
|
||||||
String id = input.readString();
|
byte[] id = new byte[StringHelper.ID_LENGTH];
|
||||||
|
input.readBytes(id, 0, id.length);
|
||||||
|
|
||||||
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
|
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
|
||||||
si.setFiles(files);
|
si.setFiles(files);
|
||||||
|
|
|
@ -64,7 +64,8 @@ public class Lucene50SegmentInfoWriter extends SegmentInfoWriter {
|
||||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||||
output.writeStringStringMap(si.getDiagnostics());
|
output.writeStringStringMap(si.getDiagnostics());
|
||||||
output.writeStringSet(si.files());
|
output.writeStringSet(si.files());
|
||||||
output.writeString(si.getId());
|
byte[] id = si.getId();
|
||||||
|
output.writeBytes(id, 0, id.length);
|
||||||
CodecUtil.writeFooter(output);
|
CodecUtil.writeFooter(output);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.lucene.util.CommandLineUtil;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LongBitSet;
|
import org.apache.lucene.util.LongBitSet;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
|
||||||
|
@ -514,7 +515,7 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
|
msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments
|
||||||
+ " " + versionString + " id=" + sis.getId() + " format=" + sFormat + userDataString);
|
+ " " + versionString + " id=" + StringHelper.idToString(sis.getId()) + " format=" + sFormat + userDataString);
|
||||||
|
|
||||||
if (onlySegments != null) {
|
if (onlySegments != null) {
|
||||||
result.partial = true;
|
result.partial = true;
|
||||||
|
@ -565,7 +566,7 @@ public class CheckIndex {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
msg(infoStream, " version=" + (version == null ? "3.0" : version));
|
msg(infoStream, " version=" + (version == null ? "3.0" : version));
|
||||||
msg(infoStream, " id=" + info.info.getId());
|
msg(infoStream, " id=" + StringHelper.idToString(info.info.getId()));
|
||||||
final Codec codec = info.info.getCodec();
|
final Codec codec = info.info.getCodec();
|
||||||
msg(infoStream, " codec=" + codec);
|
msg(infoStream, " codec=" + codec);
|
||||||
segInfoStat.codec = codec;
|
segInfoStat.codec = codec;
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -59,7 +60,7 @@ public final class SegmentInfo {
|
||||||
private boolean isCompoundFile;
|
private boolean isCompoundFile;
|
||||||
|
|
||||||
/** Id that uniquely identifies this segment. */
|
/** Id that uniquely identifies this segment. */
|
||||||
private final String id;
|
private final byte[] id;
|
||||||
|
|
||||||
private Codec codec;
|
private Codec codec;
|
||||||
|
|
||||||
|
@ -89,7 +90,7 @@ public final class SegmentInfo {
|
||||||
*/
|
*/
|
||||||
public SegmentInfo(Directory dir, Version version, String name, int docCount,
|
public SegmentInfo(Directory dir, Version version, String name, int docCount,
|
||||||
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
|
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
|
||||||
String id) {
|
byte[] id) {
|
||||||
assert !(dir instanceof TrackingDirectoryWrapper);
|
assert !(dir instanceof TrackingDirectoryWrapper);
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
this.version = version;
|
this.version = version;
|
||||||
|
@ -99,6 +100,9 @@ public final class SegmentInfo {
|
||||||
this.codec = codec;
|
this.codec = codec;
|
||||||
this.diagnostics = diagnostics;
|
this.diagnostics = diagnostics;
|
||||||
this.id = id;
|
this.id = id;
|
||||||
|
if (id != null && id.length != StringHelper.ID_LENGTH) {
|
||||||
|
throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -218,8 +222,8 @@ public final class SegmentInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return the id that uniquely identifies this segment. */
|
/** Return the id that uniquely identifies this segment. */
|
||||||
public String getId() {
|
public byte[] getId() {
|
||||||
return id;
|
return id == null ? null : id.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
private Set<String> setFiles;
|
private Set<String> setFiles;
|
||||||
|
|
|
@ -125,8 +125,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
/** The file format version for the segments_N codec header, since 4.9+ */
|
/** The file format version for the segments_N codec header, since 4.9+ */
|
||||||
public static final int VERSION_49 = 3;
|
public static final int VERSION_49 = 3;
|
||||||
|
|
||||||
/** The file format version for the segments_N codec header, since 4.11+ */
|
/** The file format version for the segments_N codec header, since 5.0+ */
|
||||||
public static final int VERSION_411 = 4;
|
public static final int VERSION_50 = 4;
|
||||||
|
|
||||||
/** Used to name new segments. */
|
/** Used to name new segments. */
|
||||||
// TODO: should this be a long ...?
|
// TODO: should this be a long ...?
|
||||||
|
@ -151,8 +151,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
*/
|
*/
|
||||||
private static PrintStream infoStream = null;
|
private static PrintStream infoStream = null;
|
||||||
|
|
||||||
/** Id for this commit; only written starting with Lucene 4.11 */
|
/** Id for this commit; only written starting with Lucene 5.0 */
|
||||||
private String id;
|
private byte[] id;
|
||||||
|
|
||||||
/** Sole constructor. Typically you call this and then
|
/** Sole constructor. Typically you call this and then
|
||||||
* use {@link #read(Directory) or
|
* use {@link #read(Directory) or
|
||||||
|
@ -262,10 +262,10 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
nextGeneration);
|
nextGeneration);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Since Lucene 4.11, every commit (segments_N) writes a unique id. This will
|
/** Since Lucene 5.0, every commit (segments_N) writes a unique id. This will
|
||||||
* return that id, or null if this commit was pre-4.11. */
|
* return that id, or null if this commit was 5.0. */
|
||||||
public String getId() {
|
public byte[] getId() {
|
||||||
return id;
|
return id == null ? null : id.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -296,7 +296,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
|
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
|
||||||
}
|
}
|
||||||
// 4.0+
|
// 4.0+
|
||||||
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_411);
|
int format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_50);
|
||||||
version = input.readLong();
|
version = input.readLong();
|
||||||
counter = input.readInt();
|
counter = input.readInt();
|
||||||
int numSegments = input.readInt();
|
int numSegments = input.readInt();
|
||||||
|
@ -361,8 +361,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
add(siPerCommit);
|
add(siPerCommit);
|
||||||
}
|
}
|
||||||
userData = input.readStringStringMap();
|
userData = input.readStringStringMap();
|
||||||
if (format >= VERSION_411) {
|
if (format >= VERSION_50) {
|
||||||
id = input.readString();
|
id = new byte[StringHelper.ID_LENGTH];
|
||||||
|
input.readBytes(id, 0, id.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (format >= VERSION_48) {
|
if (format >= VERSION_48) {
|
||||||
|
@ -425,7 +426,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
|
|
||||||
try {
|
try {
|
||||||
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
|
segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT);
|
||||||
CodecUtil.writeHeader(segnOutput, "segments", VERSION_411);
|
CodecUtil.writeHeader(segnOutput, "segments", VERSION_50);
|
||||||
segnOutput.writeLong(version);
|
segnOutput.writeLong(version);
|
||||||
segnOutput.writeInt(counter); // write counter
|
segnOutput.writeInt(counter); // write counter
|
||||||
segnOutput.writeInt(size()); // write infos
|
segnOutput.writeInt(size()); // write infos
|
||||||
|
@ -451,7 +452,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
assert si.dir == directory;
|
assert si.dir == directory;
|
||||||
}
|
}
|
||||||
segnOutput.writeStringStringMap(userData);
|
segnOutput.writeStringStringMap(userData);
|
||||||
segnOutput.writeString(StringHelper.randomId());
|
byte[] id = StringHelper.randomId();
|
||||||
|
segnOutput.writeBytes(id, 0, id.length);
|
||||||
CodecUtil.writeFooter(segnOutput);
|
CodecUtil.writeFooter(segnOutput);
|
||||||
segnOutput.close();
|
segnOutput.close();
|
||||||
directory.sync(Collections.singleton(segmentFileName));
|
directory.sync(Collections.singleton(segmentFileName));
|
||||||
|
|
|
@ -232,21 +232,21 @@ public abstract class StringHelper {
|
||||||
|
|
||||||
// Holds 128 bit unsigned value:
|
// Holds 128 bit unsigned value:
|
||||||
private static BigInteger nextId;
|
private static BigInteger nextId;
|
||||||
private static final BigInteger idMask;
|
private static final BigInteger mask128;
|
||||||
private static final Object idLock = new Object();
|
private static final Object idLock = new Object();
|
||||||
private static final String idPad = "00000000000000000000000000000000";
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
byte[] maskBytes = new byte[16];
|
// 128 bit unsigned mask
|
||||||
Arrays.fill(maskBytes, (byte) 0xff);
|
byte[] maskBytes128 = new byte[16];
|
||||||
idMask = new BigInteger(maskBytes);
|
Arrays.fill(maskBytes128, (byte) 0xff);
|
||||||
|
mask128 = new BigInteger(1, maskBytes128);
|
||||||
|
|
||||||
String prop = System.getProperty("tests.seed");
|
String prop = System.getProperty("tests.seed");
|
||||||
|
|
||||||
// State for xorshift128:
|
// State for xorshift128:
|
||||||
long x0;
|
long x0;
|
||||||
long x1;
|
long x1;
|
||||||
|
|
||||||
long seed;
|
|
||||||
if (prop != null) {
|
if (prop != null) {
|
||||||
// So if there is a test failure that somehow relied on this id,
|
// So if there is a test failure that somehow relied on this id,
|
||||||
// we remain reproducible based on the test seed:
|
// we remain reproducible based on the test seed:
|
||||||
|
@ -280,17 +280,25 @@ public abstract class StringHelper {
|
||||||
s1 ^= s1 << 23; // a
|
s1 ^= s1 << 23; // a
|
||||||
x1 = s1 ^ s0 ^ (s1 >>> 17) ^ (s0 >>> 26); // b, c
|
x1 = s1 ^ s0 ^ (s1 >>> 17) ^ (s0 >>> 26); // b, c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 64-bit unsigned mask
|
||||||
|
byte[] maskBytes64 = new byte[8];
|
||||||
|
Arrays.fill(maskBytes64, (byte) 0xff);
|
||||||
|
BigInteger mask64 = new BigInteger(1, maskBytes64);
|
||||||
|
|
||||||
// First make unsigned versions of x0, x1:
|
// First make unsigned versions of x0, x1:
|
||||||
BigInteger unsignedX0 = new BigInteger(1, BigInteger.valueOf(x0).toByteArray());
|
BigInteger unsignedX0 = BigInteger.valueOf(x0).and(mask64);
|
||||||
BigInteger unsignedX1 = new BigInteger(1, BigInteger.valueOf(x1).toByteArray());
|
BigInteger unsignedX1 = BigInteger.valueOf(x1).and(mask64);
|
||||||
|
|
||||||
// Concatentate bits of x0 and x1, as unsigned 128 bit integer:
|
// Concatentate bits of x0 and x1, as unsigned 128 bit integer:
|
||||||
nextId = unsignedX0.shiftLeft(64).or(unsignedX1);
|
nextId = unsignedX0.shiftLeft(64).or(unsignedX1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** length in bytes of an ID */
|
||||||
|
public static final int ID_LENGTH = 16;
|
||||||
|
|
||||||
/** Generates a non-cryptographic globally unique id. */
|
/** Generates a non-cryptographic globally unique id. */
|
||||||
public static String randomId() {
|
public static byte[] randomId() {
|
||||||
|
|
||||||
// NOTE: we don't use Java's UUID.randomUUID() implementation here because:
|
// NOTE: we don't use Java's UUID.randomUUID() implementation here because:
|
||||||
//
|
//
|
||||||
|
@ -306,15 +314,42 @@ public abstract class StringHelper {
|
||||||
// what impact that has on the period, whereas the simple ++ (mod 2^128)
|
// what impact that has on the period, whereas the simple ++ (mod 2^128)
|
||||||
// we use here is guaranteed to have the full period.
|
// we use here is guaranteed to have the full period.
|
||||||
|
|
||||||
String id;
|
byte bits[];
|
||||||
synchronized(idLock) {
|
synchronized(idLock) {
|
||||||
id = nextId.toString(16);
|
bits = nextId.toByteArray();
|
||||||
nextId = nextId.add(BigInteger.ONE).and(idMask);
|
nextId = nextId.add(BigInteger.ONE).and(mask128);
|
||||||
|
}
|
||||||
|
|
||||||
|
// toByteArray() always returns a sign bit, so it may require an extra byte (always zero)
|
||||||
|
if (bits.length > ID_LENGTH) {
|
||||||
|
assert bits.length == ID_LENGTH + 1;
|
||||||
|
assert bits[0] == 0;
|
||||||
|
return Arrays.copyOfRange(bits, 1, bits.length);
|
||||||
|
} else {
|
||||||
|
byte[] result = new byte[ID_LENGTH];
|
||||||
|
System.arraycopy(bits, 0, result, result.length - bits.length, bits.length);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to render an ID as a string, for debugging
|
||||||
|
* <p>
|
||||||
|
* Returns the string {@code (null)} if the id is null.
|
||||||
|
* Otherwise, returns a string representation for debugging.
|
||||||
|
* Never throws an exception. The returned string may
|
||||||
|
* indicate if the id is definitely invalid.
|
||||||
|
*/
|
||||||
|
public static String idToString(byte id[]) {
|
||||||
|
if (id == null) {
|
||||||
|
return "(null)";
|
||||||
|
} else {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append(new BigInteger(1, id).toString(Character.MAX_RADIX));
|
||||||
|
if (id.length != ID_LENGTH) {
|
||||||
|
sb.append(" (INVALID FORMAT)");
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
assert id.length() <= 32: "id=" + id;
|
|
||||||
id = idPad.substring(id.length()) + id;
|
|
||||||
|
|
||||||
return id;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2767,11 +2767,13 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
|
|
||||||
SegmentInfos sis = new SegmentInfos();
|
SegmentInfos sis = new SegmentInfos();
|
||||||
sis.read(d);
|
sis.read(d);
|
||||||
String id1 = sis.getId();
|
byte[] id1 = sis.getId();
|
||||||
assertNotNull(id1);
|
assertNotNull(id1);
|
||||||
|
assertEquals(StringHelper.ID_LENGTH, id1.length);
|
||||||
|
|
||||||
String id2 = sis.info(0).info.getId();
|
byte[] id2 = sis.info(0).info.getId();
|
||||||
assertNotNull(id2);
|
assertNotNull(id2);
|
||||||
|
assertEquals(StringHelper.ID_LENGTH, id2.length);
|
||||||
|
|
||||||
// Make sure CheckIndex includes id output:
|
// Make sure CheckIndex includes id output:
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||||
|
@ -2784,14 +2786,14 @@ public class TestIndexWriter extends LuceneTestCase {
|
||||||
assertTrue(s, indexStatus != null && indexStatus.clean);
|
assertTrue(s, indexStatus != null && indexStatus.clean);
|
||||||
|
|
||||||
// Commit id is always stored:
|
// Commit id is always stored:
|
||||||
assertTrue("missing id=" + id1 + " in:\n" + s, s.contains("id=" + id1));
|
assertTrue("missing id=" + StringHelper.idToString(id1) + " in:\n" + s, s.contains("id=" + StringHelper.idToString(id1)));
|
||||||
|
|
||||||
assertTrue("missing id=" + id2 + " in:\n" + s, s.contains("id=" + id2));
|
assertTrue("missing id=" + StringHelper.idToString(id1) + " in:\n" + s, s.contains("id=" + StringHelper.idToString(id1)));
|
||||||
d.close();
|
d.close();
|
||||||
|
|
||||||
Set<String> ids = new HashSet<>();
|
Set<String> ids = new HashSet<>();
|
||||||
for(int i=0;i<100000;i++) {
|
for(int i=0;i<100000;i++) {
|
||||||
String id = StringHelper.randomId();
|
String id = StringHelper.idToString(StringHelper.randomId());
|
||||||
assertFalse("id=" + id + " i=" + i, ids.contains(id));
|
assertFalse("id=" + id + " i=" + i, ids.contains(id));
|
||||||
ids.add(id);
|
ids.add(id);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue