mirror of https://github.com/apache/lucene.git
LUCENE-9627: Remove unused Lucene50FieldInfosFormat codec and small refactor some codecs to separate reading header/footer from reading content of the file
This commit is contained in:
parent
94c69f4385
commit
4b3e8d7ce8
|
@ -220,6 +220,9 @@ Other
|
|||
|
||||
* LUCENE-9544: add regenerate gradle script for nori dictionary (Namgyu Kim)
|
||||
|
||||
* LUCENE-9627: Remove unused Lucene50FieldInfosFormat codec and small refactor some codecs
|
||||
to separate reading header/footer from reading content of the file. (Ignacio Vera)
|
||||
|
||||
======================= Lucene 8.8.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.search.SortedNumericSortField;
|
|||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
@ -80,6 +81,12 @@ import org.apache.lucene.util.Version;
|
|||
*/
|
||||
public class Lucene70SegmentInfoFormat extends SegmentInfoFormat {
|
||||
|
||||
/** File extension used to store {@link SegmentInfo}. */
|
||||
public final static String SI_EXTENSION = "si";
|
||||
static final String CODEC_NAME = "Lucene70SegmentInfo";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene70SegmentInfoFormat() {
|
||||
}
|
||||
|
@ -95,170 +102,9 @@ public class Lucene70SegmentInfoFormat extends SegmentInfoFormat {
|
|||
Lucene70SegmentInfoFormat.VERSION_START,
|
||||
Lucene70SegmentInfoFormat.VERSION_CURRENT,
|
||||
segmentID, "");
|
||||
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
byte hasMinVersion = input.readByte();
|
||||
final Version minVersion;
|
||||
switch (hasMinVersion) {
|
||||
case 0:
|
||||
minVersion = null;
|
||||
break;
|
||||
case 1:
|
||||
minVersion = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("Illegal boolean value " + hasMinVersion, input);
|
||||
}
|
||||
|
||||
final int docCount = input.readInt();
|
||||
if (docCount < 0) {
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||
}
|
||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||
|
||||
final Map<String,String> diagnostics = input.readMapOfStrings();
|
||||
final Set<String> files = input.readSetOfStrings();
|
||||
final Map<String,String> attributes = input.readMapOfStrings();
|
||||
|
||||
int numSortFields = input.readVInt();
|
||||
Sort indexSort;
|
||||
if (numSortFields > 0) {
|
||||
SortField[] sortFields = new SortField[numSortFields];
|
||||
for(int i=0;i<numSortFields;i++) {
|
||||
String fieldName = input.readString();
|
||||
int sortTypeID = input.readVInt();
|
||||
SortField.Type sortType;
|
||||
SortedSetSelector.Type sortedSetSelector = null;
|
||||
SortedNumericSelector.Type sortedNumericSelector = null;
|
||||
switch(sortTypeID) {
|
||||
case 0:
|
||||
sortType = SortField.Type.STRING;
|
||||
break;
|
||||
case 1:
|
||||
sortType = SortField.Type.LONG;
|
||||
break;
|
||||
case 2:
|
||||
sortType = SortField.Type.INT;
|
||||
break;
|
||||
case 3:
|
||||
sortType = SortField.Type.DOUBLE;
|
||||
break;
|
||||
case 4:
|
||||
sortType = SortField.Type.FLOAT;
|
||||
break;
|
||||
case 5:
|
||||
sortType = SortField.Type.STRING;
|
||||
byte selector = input.readByte();
|
||||
if (selector == 0) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIN;
|
||||
} else if (selector == 1) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MAX;
|
||||
} else if (selector == 2) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MIN;
|
||||
} else if (selector == 3) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MAX;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedSetSelector ID: " + selector, input);
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
byte type = input.readByte();
|
||||
if (type == 0) {
|
||||
sortType = SortField.Type.LONG;
|
||||
} else if (type == 1) {
|
||||
sortType = SortField.Type.INT;
|
||||
} else if (type == 2) {
|
||||
sortType = SortField.Type.DOUBLE;
|
||||
} else if (type == 3) {
|
||||
sortType = SortField.Type.FLOAT;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedNumericSortField type ID: " + type, input);
|
||||
}
|
||||
byte numericSelector = input.readByte();
|
||||
if (numericSelector == 0) {
|
||||
sortedNumericSelector = SortedNumericSelector.Type.MIN;
|
||||
} else if (numericSelector == 1) {
|
||||
sortedNumericSelector = SortedNumericSelector.Type.MAX;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedNumericSelector ID: " + numericSelector, input);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
|
||||
}
|
||||
byte b = input.readByte();
|
||||
boolean reverse;
|
||||
if (b == 0) {
|
||||
reverse = true;
|
||||
} else if (b == 1) {
|
||||
reverse = false;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index sort reverse: " + b, input);
|
||||
}
|
||||
|
||||
if (sortedSetSelector != null) {
|
||||
sortFields[i] = new SortedSetSortField(fieldName, reverse, sortedSetSelector);
|
||||
} else if (sortedNumericSelector != null) {
|
||||
sortFields[i] = new SortedNumericSortField(fieldName, sortType, reverse, sortedNumericSelector);
|
||||
} else {
|
||||
sortFields[i] = new SortField(fieldName, sortType, reverse);
|
||||
}
|
||||
|
||||
Object missingValue;
|
||||
b = input.readByte();
|
||||
if (b == 0) {
|
||||
missingValue = null;
|
||||
} else {
|
||||
switch(sortType) {
|
||||
case STRING:
|
||||
if (b == 1) {
|
||||
missingValue = SortField.STRING_LAST;
|
||||
} else if (b == 2) {
|
||||
missingValue = SortField.STRING_FIRST;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
break;
|
||||
case LONG:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = input.readLong();
|
||||
break;
|
||||
case INT:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = input.readInt();
|
||||
break;
|
||||
case DOUBLE:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = Double.longBitsToDouble(input.readLong());
|
||||
break;
|
||||
case FLOAT:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = Float.intBitsToFloat(input.readInt());
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("unhandled sortType=" + sortType);
|
||||
}
|
||||
}
|
||||
if (missingValue != null) {
|
||||
sortFields[i].setMissingValue(missingValue);
|
||||
}
|
||||
}
|
||||
indexSort = new Sort(sortFields);
|
||||
} else if (numSortFields < 0) {
|
||||
throw new CorruptIndexException("invalid index sort field count: " + numSortFields, input);
|
||||
} else {
|
||||
indexSort = null;
|
||||
}
|
||||
|
||||
si = new SegmentInfo(dir, version, minVersion, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
|
||||
si.setFiles(files);
|
||||
si = parseSegmentInfo(dir, input, segment, segmentID);
|
||||
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
|
@ -268,14 +114,176 @@ public class Lucene70SegmentInfoFormat extends SegmentInfoFormat {
|
|||
}
|
||||
}
|
||||
|
||||
private SegmentInfo parseSegmentInfo(Directory dir, DataInput input, String segment, byte[] segmentID) throws IOException {
|
||||
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
byte hasMinVersion = input.readByte();
|
||||
final Version minVersion;
|
||||
switch (hasMinVersion) {
|
||||
case 0:
|
||||
minVersion = null;
|
||||
break;
|
||||
case 1:
|
||||
minVersion = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("Illegal boolean value " + hasMinVersion, input);
|
||||
}
|
||||
|
||||
final int docCount = input.readInt();
|
||||
if (docCount < 0) {
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||
}
|
||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||
|
||||
final Map<String,String> diagnostics = input.readMapOfStrings();
|
||||
final Set<String> files = input.readSetOfStrings();
|
||||
final Map<String,String> attributes = input.readMapOfStrings();
|
||||
|
||||
int numSortFields = input.readVInt();
|
||||
Sort indexSort;
|
||||
if (numSortFields > 0) {
|
||||
SortField[] sortFields = new SortField[numSortFields];
|
||||
for(int i=0;i<numSortFields;i++) {
|
||||
String fieldName = input.readString();
|
||||
int sortTypeID = input.readVInt();
|
||||
SortField.Type sortType;
|
||||
SortedSetSelector.Type sortedSetSelector = null;
|
||||
SortedNumericSelector.Type sortedNumericSelector = null;
|
||||
switch(sortTypeID) {
|
||||
case 0:
|
||||
sortType = SortField.Type.STRING;
|
||||
break;
|
||||
case 1:
|
||||
sortType = SortField.Type.LONG;
|
||||
break;
|
||||
case 2:
|
||||
sortType = SortField.Type.INT;
|
||||
break;
|
||||
case 3:
|
||||
sortType = SortField.Type.DOUBLE;
|
||||
break;
|
||||
case 4:
|
||||
sortType = SortField.Type.FLOAT;
|
||||
break;
|
||||
case 5:
|
||||
sortType = SortField.Type.STRING;
|
||||
byte selector = input.readByte();
|
||||
if (selector == 0) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIN;
|
||||
} else if (selector == 1) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MAX;
|
||||
} else if (selector == 2) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MIN;
|
||||
} else if (selector == 3) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MAX;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedSetSelector ID: " + selector, input);
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
byte type = input.readByte();
|
||||
if (type == 0) {
|
||||
sortType = SortField.Type.LONG;
|
||||
} else if (type == 1) {
|
||||
sortType = SortField.Type.INT;
|
||||
} else if (type == 2) {
|
||||
sortType = SortField.Type.DOUBLE;
|
||||
} else if (type == 3) {
|
||||
sortType = SortField.Type.FLOAT;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedNumericSortField type ID: " + type, input);
|
||||
}
|
||||
byte numericSelector = input.readByte();
|
||||
if (numericSelector == 0) {
|
||||
sortedNumericSelector = SortedNumericSelector.Type.MIN;
|
||||
} else if (numericSelector == 1) {
|
||||
sortedNumericSelector = SortedNumericSelector.Type.MAX;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedNumericSelector ID: " + numericSelector, input);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
|
||||
}
|
||||
byte b = input.readByte();
|
||||
boolean reverse;
|
||||
if (b == 0) {
|
||||
reverse = true;
|
||||
} else if (b == 1) {
|
||||
reverse = false;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index sort reverse: " + b, input);
|
||||
}
|
||||
|
||||
if (sortedSetSelector != null) {
|
||||
sortFields[i] = new SortedSetSortField(fieldName, reverse, sortedSetSelector);
|
||||
} else if (sortedNumericSelector != null) {
|
||||
sortFields[i] = new SortedNumericSortField(fieldName, sortType, reverse, sortedNumericSelector);
|
||||
} else {
|
||||
sortFields[i] = new SortField(fieldName, sortType, reverse);
|
||||
}
|
||||
|
||||
Object missingValue;
|
||||
b = input.readByte();
|
||||
if (b == 0) {
|
||||
missingValue = null;
|
||||
} else {
|
||||
switch(sortType) {
|
||||
case STRING:
|
||||
if (b == 1) {
|
||||
missingValue = SortField.STRING_LAST;
|
||||
} else if (b == 2) {
|
||||
missingValue = SortField.STRING_FIRST;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
break;
|
||||
case LONG:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = input.readLong();
|
||||
break;
|
||||
case INT:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = input.readInt();
|
||||
break;
|
||||
case DOUBLE:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = Double.longBitsToDouble(input.readLong());
|
||||
break;
|
||||
case FLOAT:
|
||||
if (b != 1) {
|
||||
throw new CorruptIndexException("invalid missing value flag: " + b, input);
|
||||
}
|
||||
missingValue = Float.intBitsToFloat(input.readInt());
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError("unhandled sortType=" + sortType);
|
||||
}
|
||||
}
|
||||
if (missingValue != null) {
|
||||
sortFields[i].setMissingValue(missingValue);
|
||||
}
|
||||
}
|
||||
indexSort = new Sort(sortFields);
|
||||
} else if (numSortFields < 0) {
|
||||
throw new CorruptIndexException("invalid index sort field count: " + numSortFields, input);
|
||||
} else {
|
||||
indexSort = null;
|
||||
}
|
||||
|
||||
SegmentInfo si = new SegmentInfo(dir, version, minVersion, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
|
||||
si.setFiles(files);
|
||||
return si;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
|
||||
throw new UnsupportedOperationException("Old formats can't be used for writing");
|
||||
}
|
||||
|
||||
/** File extension used to store {@link SegmentInfo}. */
|
||||
public final static String SI_EXTENSION = "si";
|
||||
static final String CODEC_NAME = "Lucene70SegmentInfo";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
}
|
||||
|
|
|
@ -47,158 +47,162 @@ public class Lucene70RWSegmentInfoFormat extends Lucene70SegmentInfoFormat {
|
|||
// Only add the file once we've successfully created it, else IFD assert can trip:
|
||||
si.addFile(fileName);
|
||||
CodecUtil.writeIndexHeader(output,
|
||||
Lucene70SegmentInfoFormat.CODEC_NAME,
|
||||
Lucene70SegmentInfoFormat.VERSION_CURRENT,
|
||||
si.getId(),
|
||||
"");
|
||||
Version version = si.getVersion();
|
||||
if (version.major < 7) {
|
||||
throw new IllegalArgumentException("invalid major version: should be >= 7 but got: " + version.major + " segment=" + si);
|
||||
}
|
||||
// Write the Lucene version that created this segment, since 3.1
|
||||
output.writeInt(version.major);
|
||||
output.writeInt(version.minor);
|
||||
output.writeInt(version.bugfix);
|
||||
|
||||
// Write the min Lucene version that contributed docs to the segment, since 7.0
|
||||
if (si.getMinVersion() != null) {
|
||||
output.writeByte((byte) 1);
|
||||
Version minVersion = si.getMinVersion();
|
||||
output.writeInt(minVersion.major);
|
||||
output.writeInt(minVersion.minor);
|
||||
output.writeInt(minVersion.bugfix);
|
||||
} else {
|
||||
output.writeByte((byte) 0);
|
||||
}
|
||||
|
||||
assert version.prerelease == 0;
|
||||
output.writeInt(si.maxDoc());
|
||||
|
||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||
output.writeMapOfStrings(si.getDiagnostics());
|
||||
Set<String> files = si.files();
|
||||
for (String file : files) {
|
||||
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
|
||||
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
|
||||
}
|
||||
}
|
||||
output.writeSetOfStrings(files);
|
||||
output.writeMapOfStrings(si.getAttributes());
|
||||
|
||||
Sort indexSort = si.getIndexSort();
|
||||
int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
|
||||
output.writeVInt(numSortFields);
|
||||
for (int i = 0; i < numSortFields; ++i) {
|
||||
SortField sortField = indexSort.getSort()[i];
|
||||
SortField.Type sortType = sortField.getType();
|
||||
output.writeString(sortField.getField());
|
||||
int sortTypeID;
|
||||
switch (sortField.getType()) {
|
||||
case STRING:
|
||||
sortTypeID = 0;
|
||||
break;
|
||||
case LONG:
|
||||
sortTypeID = 1;
|
||||
break;
|
||||
case INT:
|
||||
sortTypeID = 2;
|
||||
break;
|
||||
case DOUBLE:
|
||||
sortTypeID = 3;
|
||||
break;
|
||||
case FLOAT:
|
||||
sortTypeID = 4;
|
||||
break;
|
||||
case CUSTOM:
|
||||
if (sortField instanceof SortedSetSortField) {
|
||||
sortTypeID = 5;
|
||||
sortType = SortField.Type.STRING;
|
||||
} else if (sortField instanceof SortedNumericSortField) {
|
||||
sortTypeID = 6;
|
||||
sortType = ((SortedNumericSortField) sortField).getNumericType();
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedNumericSortField " + sortField);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
|
||||
}
|
||||
output.writeVInt(sortTypeID);
|
||||
if (sortTypeID == 5) {
|
||||
SortedSetSortField ssf = (SortedSetSortField) sortField;
|
||||
if (ssf.getSelector() == SortedSetSelector.Type.MIN) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MAX) {
|
||||
output.writeByte((byte) 1);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MIN) {
|
||||
output.writeByte((byte) 2);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MAX) {
|
||||
output.writeByte((byte) 3);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedSetSelector type: " + ssf.getSelector());
|
||||
}
|
||||
} else if (sortTypeID == 6) {
|
||||
SortedNumericSortField snsf = (SortedNumericSortField) sortField;
|
||||
if (snsf.getNumericType() == SortField.Type.LONG) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (snsf.getNumericType() == SortField.Type.INT) {
|
||||
output.writeByte((byte) 1);
|
||||
} else if (snsf.getNumericType() == SortField.Type.DOUBLE) {
|
||||
output.writeByte((byte) 2);
|
||||
} else if (snsf.getNumericType() == SortField.Type.FLOAT) {
|
||||
output.writeByte((byte) 3);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedNumericSelector type: " + snsf.getNumericType());
|
||||
}
|
||||
if (snsf.getSelector() == SortedNumericSelector.Type.MIN) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (snsf.getSelector() == SortedNumericSelector.Type.MAX) {
|
||||
output.writeByte((byte) 1);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected sorted numeric selector type: " + snsf.getSelector());
|
||||
}
|
||||
}
|
||||
output.writeByte((byte) (sortField.getReverse() ? 0 : 1));
|
||||
|
||||
// write missing value
|
||||
Object missingValue = sortField.getMissingValue();
|
||||
if (missingValue == null) {
|
||||
output.writeByte((byte) 0);
|
||||
} else {
|
||||
switch(sortType) {
|
||||
case STRING:
|
||||
if (missingValue == SortField.STRING_LAST) {
|
||||
output.writeByte((byte) 1);
|
||||
} else if (missingValue == SortField.STRING_FIRST) {
|
||||
output.writeByte((byte) 2);
|
||||
} else {
|
||||
throw new AssertionError("unrecognized missing value for STRING field \"" + sortField.getField() + "\": " + missingValue);
|
||||
}
|
||||
break;
|
||||
case LONG:
|
||||
output.writeByte((byte) 1);
|
||||
output.writeLong(((Long) missingValue).longValue());
|
||||
break;
|
||||
case INT:
|
||||
output.writeByte((byte) 1);
|
||||
output.writeInt(((Integer) missingValue).intValue());
|
||||
break;
|
||||
case DOUBLE:
|
||||
output.writeByte((byte) 1);
|
||||
output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue()));
|
||||
break;
|
||||
case FLOAT:
|
||||
output.writeByte((byte) 1);
|
||||
output.writeInt(Float.floatToIntBits(((Float) missingValue).floatValue()));
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
Lucene70SegmentInfoFormat.CODEC_NAME,
|
||||
Lucene70SegmentInfoFormat.VERSION_CURRENT,
|
||||
si.getId(),
|
||||
"");
|
||||
|
||||
writeSegmentInfo(output, si);
|
||||
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeSegmentInfo(IndexOutput output, SegmentInfo si) throws IOException {
|
||||
Version version = si.getVersion();
|
||||
if (version.major < 7) {
|
||||
throw new IllegalArgumentException("invalid major version: should be >= 7 but got: " + version.major + " segment=" + si);
|
||||
}
|
||||
// Write the Lucene version that created this segment, since 3.1
|
||||
output.writeInt(version.major);
|
||||
output.writeInt(version.minor);
|
||||
output.writeInt(version.bugfix);
|
||||
|
||||
// Write the min Lucene version that contributed docs to the segment, since 7.0
|
||||
if (si.getMinVersion() != null) {
|
||||
output.writeByte((byte) 1);
|
||||
Version minVersion = si.getMinVersion();
|
||||
output.writeInt(minVersion.major);
|
||||
output.writeInt(minVersion.minor);
|
||||
output.writeInt(minVersion.bugfix);
|
||||
} else {
|
||||
output.writeByte((byte) 0);
|
||||
}
|
||||
|
||||
assert version.prerelease == 0;
|
||||
output.writeInt(si.maxDoc());
|
||||
|
||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||
output.writeMapOfStrings(si.getDiagnostics());
|
||||
Set<String> files = si.files();
|
||||
for (String file : files) {
|
||||
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
|
||||
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
|
||||
}
|
||||
}
|
||||
output.writeSetOfStrings(files);
|
||||
output.writeMapOfStrings(si.getAttributes());
|
||||
|
||||
Sort indexSort = si.getIndexSort();
|
||||
int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
|
||||
output.writeVInt(numSortFields);
|
||||
for (int i = 0; i < numSortFields; ++i) {
|
||||
SortField sortField = indexSort.getSort()[i];
|
||||
SortField.Type sortType = sortField.getType();
|
||||
output.writeString(sortField.getField());
|
||||
int sortTypeID;
|
||||
switch (sortField.getType()) {
|
||||
case STRING:
|
||||
sortTypeID = 0;
|
||||
break;
|
||||
case LONG:
|
||||
sortTypeID = 1;
|
||||
break;
|
||||
case INT:
|
||||
sortTypeID = 2;
|
||||
break;
|
||||
case DOUBLE:
|
||||
sortTypeID = 3;
|
||||
break;
|
||||
case FLOAT:
|
||||
sortTypeID = 4;
|
||||
break;
|
||||
case CUSTOM:
|
||||
if (sortField instanceof SortedSetSortField) {
|
||||
sortTypeID = 5;
|
||||
sortType = SortField.Type.STRING;
|
||||
} else if (sortField instanceof SortedNumericSortField) {
|
||||
sortTypeID = 6;
|
||||
sortType = ((SortedNumericSortField) sortField).getNumericType();
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedNumericSortField " + sortField);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
|
||||
}
|
||||
output.writeVInt(sortTypeID);
|
||||
if (sortTypeID == 5) {
|
||||
SortedSetSortField ssf = (SortedSetSortField) sortField;
|
||||
if (ssf.getSelector() == SortedSetSelector.Type.MIN) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MAX) {
|
||||
output.writeByte((byte) 1);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MIN) {
|
||||
output.writeByte((byte) 2);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MAX) {
|
||||
output.writeByte((byte) 3);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedSetSelector type: " + ssf.getSelector());
|
||||
}
|
||||
} else if (sortTypeID == 6) {
|
||||
SortedNumericSortField snsf = (SortedNumericSortField) sortField;
|
||||
if (snsf.getNumericType() == SortField.Type.LONG) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (snsf.getNumericType() == SortField.Type.INT) {
|
||||
output.writeByte((byte) 1);
|
||||
} else if (snsf.getNumericType() == SortField.Type.DOUBLE) {
|
||||
output.writeByte((byte) 2);
|
||||
} else if (snsf.getNumericType() == SortField.Type.FLOAT) {
|
||||
output.writeByte((byte) 3);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedNumericSelector type: " + snsf.getNumericType());
|
||||
}
|
||||
if (snsf.getSelector() == SortedNumericSelector.Type.MIN) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (snsf.getSelector() == SortedNumericSelector.Type.MAX) {
|
||||
output.writeByte((byte) 1);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected sorted numeric selector type: " + snsf.getSelector());
|
||||
}
|
||||
}
|
||||
output.writeByte((byte) (sortField.getReverse() ? 0 : 1));
|
||||
|
||||
// write missing value
|
||||
Object missingValue = sortField.getMissingValue();
|
||||
if (missingValue == null) {
|
||||
output.writeByte((byte) 0);
|
||||
} else {
|
||||
switch(sortType) {
|
||||
case STRING:
|
||||
if (missingValue == SortField.STRING_LAST) {
|
||||
output.writeByte((byte) 1);
|
||||
} else if (missingValue == SortField.STRING_FIRST) {
|
||||
output.writeByte((byte) 2);
|
||||
} else {
|
||||
throw new AssertionError("unrecognized missing value for STRING field \"" + sortField.getField() + "\": " + missingValue);
|
||||
}
|
||||
break;
|
||||
case LONG:
|
||||
output.writeByte((byte) 1);
|
||||
output.writeLong(((Long) missingValue).longValue());
|
||||
break;
|
||||
case INT:
|
||||
output.writeByte((byte) 1);
|
||||
output.writeInt(((Integer) missingValue).intValue());
|
||||
break;
|
||||
case DOUBLE:
|
||||
output.writeByte((byte) 1);
|
||||
output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue()));
|
||||
break;
|
||||
case FLOAT:
|
||||
output.writeByte((byte) 1);
|
||||
output.writeInt(Float.floatToIntBits(((Float) missingValue).floatValue()));
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,6 +62,15 @@ import org.apache.lucene.store.IndexOutput;
|
|||
*/
|
||||
public final class Lucene50CompoundFormat extends CompoundFormat {
|
||||
|
||||
/** Extension of compound file */
|
||||
static final String DATA_EXTENSION = "cfs";
|
||||
/** Extension of compound file entries */
|
||||
static final String ENTRIES_EXTENSION = "cfe";
|
||||
static final String DATA_CODEC = "Lucene50CompoundData";
|
||||
static final String ENTRY_CODEC = "Lucene50CompoundEntries";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene50CompoundFormat() {
|
||||
}
|
||||
|
@ -80,52 +89,46 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
|
|||
IndexOutput entries = dir.createOutput(entriesFile, context)) {
|
||||
CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), "");
|
||||
CodecUtil.writeIndexHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), "");
|
||||
|
||||
// write number of files
|
||||
entries.writeVInt(si.files().size());
|
||||
for (String file : si.files()) {
|
||||
|
||||
// write bytes for file
|
||||
long startOffset = data.getFilePointer();
|
||||
try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) {
|
||||
|
||||
// just copies the index header, verifying that its id matches what we expect
|
||||
CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId());
|
||||
|
||||
// copy all bytes except the footer
|
||||
long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer();
|
||||
data.copyBytes(in, numBytesToCopy);
|
||||
|
||||
// verify footer (checksum) matches for the incoming file we are copying
|
||||
long checksum = CodecUtil.checkFooter(in);
|
||||
|
||||
// this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not data.getChecksum(), but I think
|
||||
// adding a public method to CodecUtil to do that is somewhat dangerous:
|
||||
data.writeInt(CodecUtil.FOOTER_MAGIC);
|
||||
data.writeInt(0);
|
||||
data.writeLong(checksum);
|
||||
}
|
||||
long endOffset = data.getFilePointer();
|
||||
|
||||
long length = endOffset - startOffset;
|
||||
|
||||
// write entry for file
|
||||
entries.writeString(IndexFileNames.stripSegmentName(file));
|
||||
entries.writeLong(startOffset);
|
||||
entries.writeLong(length);
|
||||
}
|
||||
writeCompoundFile(entries, data, dir, si);
|
||||
|
||||
CodecUtil.writeFooter(data);
|
||||
CodecUtil.writeFooter(entries);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeCompoundFile(IndexOutput entries, IndexOutput data, Directory dir, SegmentInfo si) throws IOException {
|
||||
// write number of files
|
||||
entries.writeVInt(si.files().size());
|
||||
for (String file : si.files()) {
|
||||
// write bytes for file
|
||||
long startOffset = data.getFilePointer();
|
||||
try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) {
|
||||
|
||||
/** Extension of compound file */
|
||||
static final String DATA_EXTENSION = "cfs";
|
||||
/** Extension of compound file entries */
|
||||
static final String ENTRIES_EXTENSION = "cfe";
|
||||
static final String DATA_CODEC = "Lucene50CompoundData";
|
||||
static final String ENTRY_CODEC = "Lucene50CompoundEntries";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
// just copies the index header, verifying that its id matches what we expect
|
||||
CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId());
|
||||
|
||||
// copy all bytes except the footer
|
||||
long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer();
|
||||
data.copyBytes(in, numBytesToCopy);
|
||||
|
||||
// verify footer (checksum) matches for the incoming file we are copying
|
||||
long checksum = CodecUtil.checkFooter(in);
|
||||
|
||||
// this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not data.getChecksum(), but I think
|
||||
// adding a public method to CodecUtil to do that is somewhat dangerous:
|
||||
data.writeInt(CodecUtil.FOOTER_MAGIC);
|
||||
data.writeInt(0);
|
||||
data.writeLong(checksum);
|
||||
}
|
||||
long endOffset = data.getFilePointer();
|
||||
|
||||
long length = endOffset - startOffset;
|
||||
|
||||
// write entry for file
|
||||
entries.writeString(IndexFileNames.stripSegmentName(file));
|
||||
entries.writeLong(startOffset);
|
||||
entries.writeLong(length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -100,25 +100,16 @@ final class Lucene50CompoundReader extends CompoundDirectory {
|
|||
|
||||
/** Helper method that reads CFS entries from an input stream */
|
||||
private Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
|
||||
Map<String,FileEntry> mapping = null;
|
||||
Map<String, FileEntry> mapping = null;
|
||||
try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
|
||||
Throwable priorE = null;
|
||||
try {
|
||||
version = CodecUtil.checkIndexHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC,
|
||||
Lucene50CompoundFormat.VERSION_START,
|
||||
Lucene50CompoundFormat.VERSION_CURRENT, segmentID, "");
|
||||
final int numEntries = entriesStream.readVInt();
|
||||
mapping = new HashMap<>(numEntries);
|
||||
for (int i = 0; i < numEntries; i++) {
|
||||
final FileEntry fileEntry = new FileEntry();
|
||||
final String id = entriesStream.readString();
|
||||
FileEntry previous = mapping.put(id, fileEntry);
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream);
|
||||
}
|
||||
fileEntry.offset = entriesStream.readLong();
|
||||
fileEntry.length = entriesStream.readLong();
|
||||
}
|
||||
version = CodecUtil.checkIndexHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC,
|
||||
Lucene50CompoundFormat.VERSION_START,
|
||||
Lucene50CompoundFormat.VERSION_CURRENT, segmentID, "");
|
||||
|
||||
mapping = readMapping(entriesStream);
|
||||
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
|
@ -128,6 +119,22 @@ final class Lucene50CompoundReader extends CompoundDirectory {
|
|||
return Collections.unmodifiableMap(mapping);
|
||||
}
|
||||
|
||||
private Map<String,FileEntry> readMapping(IndexInput entriesStream) throws IOException {
|
||||
final int numEntries = entriesStream.readVInt();
|
||||
Map<String,FileEntry> mapping = new HashMap<>(numEntries);
|
||||
for (int i = 0; i < numEntries; i++) {
|
||||
final FileEntry fileEntry = new FileEntry();
|
||||
final String id = entriesStream.readString();
|
||||
FileEntry previous = mapping.put(id, fileEntry);
|
||||
if (previous != null) {
|
||||
throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream);
|
||||
}
|
||||
fileEntry.offset = entriesStream.readLong();
|
||||
fileEntry.length = entriesStream.readLong();
|
||||
}
|
||||
return mapping;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(handle);
|
||||
|
|
|
@ -1,296 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene50;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.VectorValues;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/**
|
||||
* Lucene 5.0 Field Infos format.
|
||||
* <p>Field names are stored in the field info file, with suffix <code>.fnm</code>.
|
||||
* <p>FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber,
|
||||
* FieldBits,DocValuesBits,DocValuesGen,Attributes> <sup>FieldsCount</sup>,Footer
|
||||
* <p>Data types:
|
||||
* <ul>
|
||||
* <li>Header --> {@link CodecUtil#checkIndexHeader IndexHeader}</li>
|
||||
* <li>FieldsCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>FieldName --> {@link DataOutput#writeString String}</li>
|
||||
* <li>FieldBits, IndexOptions, DocValuesBits --> {@link DataOutput#writeByte Byte}</li>
|
||||
* <li>FieldNumber --> {@link DataOutput#writeInt VInt}</li>
|
||||
* <li>Attributes --> {@link DataOutput#writeMapOfStrings Map<String,String>}</li>
|
||||
* <li>DocValuesGen --> {@link DataOutput#writeLong(long) Int64}</li>
|
||||
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
|
||||
* </ul>
|
||||
* Field Descriptions:
|
||||
* <ul>
|
||||
* <li>FieldsCount: the number of fields in this file.</li>
|
||||
* <li>FieldName: name of the field as a UTF-8 String.</li>
|
||||
* <li>FieldNumber: the field's number. Note that unlike previous versions of
|
||||
* Lucene, the fields are not numbered implicitly by their order in the
|
||||
* file, instead explicitly.</li>
|
||||
* <li>FieldBits: a byte containing field options.
|
||||
* <ul>
|
||||
* <li>The low order bit (0x1) is one for fields that have term vectors
|
||||
* stored, and zero for fields without term vectors.</li>
|
||||
* <li>If the second lowest order-bit is set (0x2), norms are omitted for the
|
||||
* indexed field.</li>
|
||||
* <li>If the third lowest-order bit is set (0x4), payloads are stored for the
|
||||
* indexed field.</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>IndexOptions: a byte containing index options.
|
||||
* <ul>
|
||||
* <li>0: not indexed</li>
|
||||
* <li>1: indexed as DOCS_ONLY</li>
|
||||
* <li>2: indexed as DOCS_AND_FREQS</li>
|
||||
* <li>3: indexed as DOCS_AND_FREQS_AND_POSITIONS</li>
|
||||
* <li>4: indexed as DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>DocValuesBits: a byte containing per-document value types. The type
|
||||
* recorded as two four-bit integers, with the high-order bits representing
|
||||
* <code>norms</code> options, and the low-order bits representing
|
||||
* {@code DocValues} options. Each four-bit integer can be decoded as such:
|
||||
* <ul>
|
||||
* <li>0: no DocValues for this field.</li>
|
||||
* <li>1: NumericDocValues. ({@link DocValuesType#NUMERIC})</li>
|
||||
* <li>2: BinaryDocValues. ({@code DocValuesType#BINARY})</li>
|
||||
* <li>3: SortedDocValues. ({@code DocValuesType#SORTED})</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>DocValuesGen is the generation count of the field's DocValues. If this is -1,
|
||||
* there are no DocValues updates to that field. Anything above zero means there
|
||||
* are updates stored by {@link DocValuesFormat}.</li>
|
||||
* <li>Attributes: a key-value map of codec-private attributes.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class Lucene50FieldInfosFormat extends FieldInfosFormat {
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene50FieldInfosFormat() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
|
||||
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
|
||||
Throwable priorE = null;
|
||||
FieldInfo infos[] = null;
|
||||
try {
|
||||
CodecUtil.checkIndexHeader(input, Lucene50FieldInfosFormat.CODEC_NAME,
|
||||
Lucene50FieldInfosFormat.FORMAT_START,
|
||||
Lucene50FieldInfosFormat.FORMAT_CURRENT,
|
||||
segmentInfo.getId(), segmentSuffix);
|
||||
|
||||
final int size = input.readVInt(); //read in the size
|
||||
infos = new FieldInfo[size];
|
||||
|
||||
// previous field's attribute map, we share when possible:
|
||||
Map<String,String> lastAttributes = Collections.emptyMap();
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String name = input.readString();
|
||||
final int fieldNumber = input.readVInt();
|
||||
if (fieldNumber < 0) {
|
||||
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
|
||||
}
|
||||
byte bits = input.readByte();
|
||||
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
||||
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
||||
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
|
||||
|
||||
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
|
||||
|
||||
// DV Types are packed in one byte
|
||||
final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
|
||||
final long dvGen = input.readLong();
|
||||
Map<String,String> attributes = input.readMapOfStrings();
|
||||
|
||||
// just use the last field's map if its the same
|
||||
if (attributes.equals(lastAttributes)) {
|
||||
attributes = lastAttributes;
|
||||
}
|
||||
lastAttributes = attributes;
|
||||
try {
|
||||
infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads,
|
||||
indexOptions, docValuesType, dvGen, attributes, 0, 0, 0,
|
||||
0, VectorValues.SearchStrategy.NONE, false);
|
||||
} catch (IllegalStateException e) {
|
||||
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
|
||||
}
|
||||
}
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
CodecUtil.checkFooter(input, priorE);
|
||||
}
|
||||
return new FieldInfos(infos);
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
// We "mirror" DocValues enum values with the constants below; let's try to ensure if we add a new DocValuesType while this format is
|
||||
// still used for writing, we remember to fix this encoding:
|
||||
assert DocValuesType.values().length == 6;
|
||||
}
|
||||
|
||||
private static byte docValuesByte(DocValuesType type) {
|
||||
switch(type) {
|
||||
case NONE:
|
||||
return 0;
|
||||
case NUMERIC:
|
||||
return 1;
|
||||
case BINARY:
|
||||
return 2;
|
||||
case SORTED:
|
||||
return 3;
|
||||
case SORTED_SET:
|
||||
return 4;
|
||||
case SORTED_NUMERIC:
|
||||
return 5;
|
||||
default:
|
||||
// BUG
|
||||
throw new AssertionError("unhandled DocValuesType: " + type);
|
||||
}
|
||||
}
|
||||
|
||||
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
|
||||
switch(b) {
|
||||
case 0:
|
||||
return DocValuesType.NONE;
|
||||
case 1:
|
||||
return DocValuesType.NUMERIC;
|
||||
case 2:
|
||||
return DocValuesType.BINARY;
|
||||
case 3:
|
||||
return DocValuesType.SORTED;
|
||||
case 4:
|
||||
return DocValuesType.SORTED_SET;
|
||||
case 5:
|
||||
return DocValuesType.SORTED_NUMERIC;
|
||||
default:
|
||||
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
// We "mirror" IndexOptions enum values with the constants below; let's try to ensure if we add a new IndexOption while this format is
|
||||
// still used for writing, we remember to fix this encoding:
|
||||
assert IndexOptions.values().length == 5;
|
||||
}
|
||||
|
||||
private static byte indexOptionsByte(IndexOptions indexOptions) {
|
||||
switch (indexOptions) {
|
||||
case NONE:
|
||||
return 0;
|
||||
case DOCS:
|
||||
return 1;
|
||||
case DOCS_AND_FREQS:
|
||||
return 2;
|
||||
case DOCS_AND_FREQS_AND_POSITIONS:
|
||||
return 3;
|
||||
case DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS:
|
||||
return 4;
|
||||
default:
|
||||
// BUG:
|
||||
throw new AssertionError("unhandled IndexOptions: " + indexOptions);
|
||||
}
|
||||
}
|
||||
|
||||
private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException {
|
||||
switch (b) {
|
||||
case 0:
|
||||
return IndexOptions.NONE;
|
||||
case 1:
|
||||
return IndexOptions.DOCS;
|
||||
case 2:
|
||||
return IndexOptions.DOCS_AND_FREQS;
|
||||
case 3:
|
||||
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
case 4:
|
||||
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
default:
|
||||
// BUG
|
||||
throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
|
||||
try (IndexOutput output = directory.createOutput(fileName, context)) {
|
||||
CodecUtil.writeIndexHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
|
||||
output.writeVInt(infos.size());
|
||||
for (FieldInfo fi : infos) {
|
||||
fi.checkConsistency();
|
||||
|
||||
output.writeString(fi.name);
|
||||
output.writeVInt(fi.number);
|
||||
|
||||
byte bits = 0x0;
|
||||
if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
|
||||
if (fi.omitsNorms()) bits |= OMIT_NORMS;
|
||||
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
|
||||
output.writeByte(bits);
|
||||
|
||||
output.writeByte(indexOptionsByte(fi.getIndexOptions()));
|
||||
|
||||
// pack the DV type and hasNorms in one byte
|
||||
output.writeByte(docValuesByte(fi.getDocValuesType()));
|
||||
output.writeLong(fi.getDocValuesGen());
|
||||
output.writeMapOfStrings(fi.attributes());
|
||||
}
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
}
|
||||
|
||||
/** Extension of field infos */
|
||||
static final String EXTENSION = "fnm";
|
||||
|
||||
// Codec header
|
||||
static final String CODEC_NAME = "Lucene50FieldInfos";
|
||||
static final int FORMAT_SAFE_MAPS = 1;
|
||||
static final int FORMAT_START = FORMAT_SAFE_MAPS;
|
||||
static final int FORMAT_CURRENT = FORMAT_SAFE_MAPS;
|
||||
|
||||
// Field flags
|
||||
static final byte STORE_TERMVECTOR = 0x1;
|
||||
static final byte OMIT_NORMS = 0x2;
|
||||
static final byte STORE_PAYLOADS = 0x4;
|
||||
}
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.store.ChecksumIndexInput;
|
|||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -46,21 +47,21 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
* </ul>
|
||||
*/
|
||||
public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
|
||||
|
||||
/** extension of live docs */
|
||||
private static final String EXTENSION = "liv";
|
||||
|
||||
/** codec of live docs */
|
||||
private static final String CODEC_NAME = "Lucene50LiveDocs";
|
||||
|
||||
/** supported version range */
|
||||
private static final int VERSION_START = 0;
|
||||
private static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene50LiveDocsFormat() {
|
||||
}
|
||||
|
||||
/** extension of live docs */
|
||||
private static final String EXTENSION = "liv";
|
||||
|
||||
/** codec of live docs */
|
||||
private static final String CODEC_NAME = "Lucene50LiveDocs";
|
||||
|
||||
/** supported version range */
|
||||
private static final int VERSION_START = 0;
|
||||
private static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
@Override
|
||||
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
|
||||
long gen = info.getDelGen();
|
||||
|
@ -71,11 +72,9 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
|
|||
try {
|
||||
CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT,
|
||||
info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
|
||||
long data[] = new long[FixedBitSet.bits2words(length)];
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
data[i] = input.readLong();
|
||||
}
|
||||
FixedBitSet fbs = new FixedBitSet(data, length);
|
||||
|
||||
FixedBitSet fbs = readFixedBitSet(input, length);
|
||||
|
||||
if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
|
||||
throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) +
|
||||
" info.delcount=" + info.getDelCount(), input);
|
||||
|
@ -89,26 +88,26 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
|
|||
}
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
private FixedBitSet readFixedBitSet(IndexInput input, int length) throws IOException {
|
||||
long data[] = new long[FixedBitSet.bits2words(length)];
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
data[i] = input.readLong();
|
||||
}
|
||||
return new FixedBitSet(data, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeLiveDocs(Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException {
|
||||
long gen = info.getNextDelGen();
|
||||
String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
|
||||
int delCount = 0;
|
||||
int delCount;
|
||||
try (IndexOutput output = dir.createOutput(name, context)) {
|
||||
|
||||
CodecUtil.writeIndexHeader(output, CODEC_NAME, VERSION_CURRENT, info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
|
||||
final int longCount = FixedBitSet.bits2words(bits.length());
|
||||
for (int i = 0; i < longCount; ++i) {
|
||||
long currentBits = 0;
|
||||
for (int j = i << 6, end = Math.min(j + 63, bits.length() - 1); j <= end; ++j) {
|
||||
if (bits.get(j)) {
|
||||
currentBits |= 1L << j; // mod 64
|
||||
} else {
|
||||
delCount += 1;
|
||||
}
|
||||
}
|
||||
output.writeLong(currentBits);
|
||||
}
|
||||
|
||||
delCount = writeBits(output, bits);
|
||||
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
if (delCount != info.getDelCount() + newDelCount) {
|
||||
|
@ -117,6 +116,23 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
private int writeBits(IndexOutput output, Bits bits) throws IOException {
|
||||
int delCount = 0;
|
||||
final int longCount = FixedBitSet.bits2words(bits.length());
|
||||
for (int i = 0; i < longCount; ++i) {
|
||||
long currentBits = 0;
|
||||
for (int j = i << 6, end = Math.min(j + 63, bits.length() - 1); j <= end; ++j) {
|
||||
if (bits.get(j)) {
|
||||
currentBits |= 1L << j; // mod 64
|
||||
} else {
|
||||
delCount += 1;
|
||||
}
|
||||
}
|
||||
output.writeLong(currentBits);
|
||||
}
|
||||
return delCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(SegmentCommitInfo info, Collection<String> files) throws IOException {
|
||||
if (info.hasDeletions()) {
|
||||
|
|
|
@ -104,6 +104,22 @@ import org.apache.lucene.store.IndexOutput;
|
|||
*/
|
||||
public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
||||
|
||||
/** Extension of field infos */
|
||||
static final String EXTENSION = "fnm";
|
||||
|
||||
// Codec header
|
||||
static final String CODEC_NAME = "Lucene60FieldInfos";
|
||||
static final int FORMAT_START = 0;
|
||||
static final int FORMAT_SOFT_DELETES = 1;
|
||||
static final int FORMAT_SELECTIVE_INDEXING = 2;
|
||||
static final int FORMAT_CURRENT = FORMAT_SELECTIVE_INDEXING;
|
||||
|
||||
// Field flags
|
||||
static final byte STORE_TERMVECTOR = 0x1;
|
||||
static final byte OMIT_NORMS = 0x2;
|
||||
static final byte STORE_PAYLOADS = 0x4;
|
||||
static final byte SOFT_DELETES_FIELD = 0x8;
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene60FieldInfosFormat() {
|
||||
}
|
||||
|
@ -113,7 +129,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
|||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
|
||||
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
|
||||
Throwable priorE = null;
|
||||
FieldInfo infos[] = null;
|
||||
FieldInfo[] infos = null;
|
||||
try {
|
||||
int version = CodecUtil.checkIndexHeader(input,
|
||||
Lucene60FieldInfosFormat.CODEC_NAME,
|
||||
|
@ -121,56 +137,8 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
|||
Lucene60FieldInfosFormat.FORMAT_CURRENT,
|
||||
segmentInfo.getId(), segmentSuffix);
|
||||
|
||||
final int size = input.readVInt(); //read in the size
|
||||
infos = new FieldInfo[size];
|
||||
|
||||
// previous field's attribute map, we share when possible:
|
||||
Map<String,String> lastAttributes = Collections.emptyMap();
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String name = input.readString();
|
||||
final int fieldNumber = input.readVInt();
|
||||
if (fieldNumber < 0) {
|
||||
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
|
||||
}
|
||||
byte bits = input.readByte();
|
||||
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
||||
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
||||
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
|
||||
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
|
||||
|
||||
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
|
||||
|
||||
// DV Types are packed in one byte
|
||||
final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
|
||||
final long dvGen = input.readLong();
|
||||
Map<String,String> attributes = input.readMapOfStrings();
|
||||
// just use the last field's map if its the same
|
||||
if (attributes.equals(lastAttributes)) {
|
||||
attributes = lastAttributes;
|
||||
}
|
||||
lastAttributes = attributes;
|
||||
int pointDataDimensionCount = input.readVInt();
|
||||
int pointNumBytes;
|
||||
int pointIndexDimensionCount = pointDataDimensionCount;
|
||||
if (pointDataDimensionCount != 0) {
|
||||
if (version >= Lucene60FieldInfosFormat.FORMAT_SELECTIVE_INDEXING) {
|
||||
pointIndexDimensionCount = input.readVInt();
|
||||
}
|
||||
pointNumBytes = input.readVInt();
|
||||
} else {
|
||||
pointNumBytes = 0;
|
||||
}
|
||||
|
||||
try {
|
||||
infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads,
|
||||
indexOptions, docValuesType, dvGen, attributes,
|
||||
pointDataDimensionCount, pointIndexDimensionCount, pointNumBytes,
|
||||
0, VectorValues.SearchStrategy.NONE, isSoftDeletesField);
|
||||
} catch (IllegalStateException e) {
|
||||
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
|
||||
}
|
||||
}
|
||||
infos = readFieldInfos(input, version);
|
||||
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
|
@ -179,6 +147,60 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
|||
return new FieldInfos(infos);
|
||||
}
|
||||
}
|
||||
|
||||
private FieldInfo[] readFieldInfos(IndexInput input, int version) throws IOException {
|
||||
final int size = input.readVInt(); //read in the size
|
||||
FieldInfo[] infos = new FieldInfo[size];
|
||||
|
||||
// previous field's attribute map, we share when possible:
|
||||
Map<String,String> lastAttributes = Collections.emptyMap();
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String name = input.readString();
|
||||
final int fieldNumber = input.readVInt();
|
||||
if (fieldNumber < 0) {
|
||||
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
|
||||
}
|
||||
byte bits = input.readByte();
|
||||
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
||||
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
||||
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
|
||||
boolean isSoftDeletesField = (bits & SOFT_DELETES_FIELD) != 0;
|
||||
|
||||
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
|
||||
|
||||
// DV Types are packed in one byte
|
||||
final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
|
||||
final long dvGen = input.readLong();
|
||||
Map<String,String> attributes = input.readMapOfStrings();
|
||||
// just use the last field's map if its the same
|
||||
if (attributes.equals(lastAttributes)) {
|
||||
attributes = lastAttributes;
|
||||
}
|
||||
lastAttributes = attributes;
|
||||
int pointDataDimensionCount = input.readVInt();
|
||||
int pointNumBytes;
|
||||
int pointIndexDimensionCount = pointDataDimensionCount;
|
||||
if (pointDataDimensionCount != 0) {
|
||||
if (version >= Lucene60FieldInfosFormat.FORMAT_SELECTIVE_INDEXING) {
|
||||
pointIndexDimensionCount = input.readVInt();
|
||||
}
|
||||
pointNumBytes = input.readVInt();
|
||||
} else {
|
||||
pointNumBytes = 0;
|
||||
}
|
||||
|
||||
try {
|
||||
infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads,
|
||||
indexOptions, docValuesType, dvGen, attributes,
|
||||
pointDataDimensionCount, pointIndexDimensionCount, pointNumBytes,
|
||||
0, VectorValues.SearchStrategy.NONE, isSoftDeletesField);
|
||||
} catch (IllegalStateException e) {
|
||||
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
|
||||
}
|
||||
}
|
||||
return infos;
|
||||
}
|
||||
|
||||
static {
|
||||
// We "mirror" DocValues enum values with the constants below; let's try to ensure if we add a new DocValuesType while this format is
|
||||
|
@ -301,20 +323,4 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
|
|||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
}
|
||||
|
||||
/** Extension of field infos */
|
||||
static final String EXTENSION = "fnm";
|
||||
|
||||
// Codec header
|
||||
static final String CODEC_NAME = "Lucene60FieldInfos";
|
||||
static final int FORMAT_START = 0;
|
||||
static final int FORMAT_SOFT_DELETES = 1;
|
||||
static final int FORMAT_SELECTIVE_INDEXING = 2;
|
||||
static final int FORMAT_CURRENT = FORMAT_SELECTIVE_INDEXING;
|
||||
|
||||
// Field flags
|
||||
static final byte STORE_TERMVECTOR = 0x1;
|
||||
static final byte OMIT_NORMS = 0x2;
|
||||
static final byte STORE_PAYLOADS = 0x4;
|
||||
static final byte SOFT_DELETES_FIELD = 0x8;
|
||||
}
|
||||
|
|
|
@ -78,7 +78,9 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
Lucene80DocValuesFormat.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
|
||||
readFields(in, state.fieldInfos);
|
||||
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
|
@ -113,7 +115,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
}
|
||||
}
|
||||
|
||||
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
||||
private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
|
||||
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
|
||||
FieldInfo info = infos.fieldInfo(fieldNumber);
|
||||
if (info == null) {
|
||||
|
@ -136,13 +138,13 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
}
|
||||
}
|
||||
|
||||
private NumericEntry readNumeric(ChecksumIndexInput meta) throws IOException {
|
||||
private NumericEntry readNumeric(IndexInput meta) throws IOException {
|
||||
NumericEntry entry = new NumericEntry();
|
||||
readNumeric(meta, entry);
|
||||
return entry;
|
||||
}
|
||||
|
||||
private void readNumeric(ChecksumIndexInput meta, NumericEntry entry) throws IOException {
|
||||
private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException {
|
||||
entry.docsWithFieldOffset = meta.readLong();
|
||||
entry.docsWithFieldLength = meta.readLong();
|
||||
entry.jumpTableEntryCount = meta.readShort();
|
||||
|
@ -172,7 +174,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
entry.valueJumpTableOffset = meta.readLong();
|
||||
}
|
||||
|
||||
private BinaryEntry readBinary(ChecksumIndexInput meta) throws IOException {
|
||||
private BinaryEntry readBinary(IndexInput meta) throws IOException {
|
||||
BinaryEntry entry = new BinaryEntry();
|
||||
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
|
||||
int b = meta.readByte();
|
||||
|
@ -218,7 +220,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
return entry;
|
||||
}
|
||||
|
||||
private SortedEntry readSorted(ChecksumIndexInput meta) throws IOException {
|
||||
private SortedEntry readSorted(IndexInput meta) throws IOException {
|
||||
SortedEntry entry = new SortedEntry();
|
||||
entry.docsWithFieldOffset = meta.readLong();
|
||||
entry.docsWithFieldLength = meta.readLong();
|
||||
|
@ -232,7 +234,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
return entry;
|
||||
}
|
||||
|
||||
private SortedSetEntry readSortedSet(ChecksumIndexInput meta) throws IOException {
|
||||
private SortedSetEntry readSortedSet(IndexInput meta) throws IOException {
|
||||
SortedSetEntry entry = new SortedSetEntry();
|
||||
byte multiValued = meta.readByte();
|
||||
switch (multiValued) {
|
||||
|
@ -261,7 +263,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
return entry;
|
||||
}
|
||||
|
||||
private static void readTermDict(ChecksumIndexInput meta, TermsDictEntry entry) throws IOException {
|
||||
private static void readTermDict(IndexInput meta, TermsDictEntry entry) throws IOException {
|
||||
entry.termsDictSize = meta.readVLong();
|
||||
entry.termsDictBlockShift = meta.readInt();
|
||||
final int blockShift = meta.readInt();
|
||||
|
@ -281,7 +283,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
|
|||
entry.termsIndexAddressesLength = meta.readLong();
|
||||
}
|
||||
|
||||
private SortedNumericEntry readSortedNumeric(ChecksumIndexInput meta) throws IOException {
|
||||
private SortedNumericEntry readSortedNumeric(IndexInput meta) throws IOException {
|
||||
SortedNumericEntry entry = new SortedNumericEntry();
|
||||
readNumeric(meta, entry);
|
||||
entry.numDocsWithField = meta.readInt();
|
||||
|
|
|
@ -81,6 +81,12 @@ import org.apache.lucene.util.Version;
|
|||
*/
|
||||
public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
|
||||
|
||||
/** File extension used to store {@link SegmentInfo}. */
|
||||
public final static String SI_EXTENSION = "si";
|
||||
static final String CODEC_NAME = "Lucene86SegmentInfo";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene86SegmentInfoFormat() {
|
||||
}
|
||||
|
@ -96,47 +102,9 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
|
|||
VERSION_START,
|
||||
VERSION_CURRENT,
|
||||
segmentID, "");
|
||||
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
byte hasMinVersion = input.readByte();
|
||||
final Version minVersion;
|
||||
switch (hasMinVersion) {
|
||||
case 0:
|
||||
minVersion = null;
|
||||
break;
|
||||
case 1:
|
||||
minVersion = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("Illegal boolean value " + hasMinVersion, input);
|
||||
}
|
||||
|
||||
final int docCount = input.readInt();
|
||||
if (docCount < 0) {
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||
}
|
||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||
|
||||
final Map<String,String> diagnostics = input.readMapOfStrings();
|
||||
final Set<String> files = input.readSetOfStrings();
|
||||
final Map<String,String> attributes = input.readMapOfStrings();
|
||||
|
||||
int numSortFields = input.readVInt();
|
||||
Sort indexSort;
|
||||
if (numSortFields > 0) {
|
||||
SortField[] sortFields = new SortField[numSortFields];
|
||||
for(int i=0;i<numSortFields;i++) {
|
||||
String name = input.readString();
|
||||
sortFields[i] = SortFieldProvider.forName(name).readSortField(input);
|
||||
}
|
||||
indexSort = new Sort(sortFields);
|
||||
} else if (numSortFields < 0) {
|
||||
throw new CorruptIndexException("invalid index sort field count: " + numSortFields, input);
|
||||
} else {
|
||||
indexSort = null;
|
||||
}
|
||||
|
||||
si = new SegmentInfo(dir, version, minVersion, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
|
||||
si.setFiles(files);
|
||||
|
||||
si = parseSegmentInfo(dir, input, segment, segmentID);
|
||||
|
||||
} catch (Throwable exception) {
|
||||
priorE = exception;
|
||||
} finally {
|
||||
|
@ -146,6 +114,51 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
|
|||
}
|
||||
}
|
||||
|
||||
private SegmentInfo parseSegmentInfo(Directory dir, DataInput input, String segment, byte[] segmentID) throws IOException {
|
||||
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
byte hasMinVersion = input.readByte();
|
||||
final Version minVersion;
|
||||
switch (hasMinVersion) {
|
||||
case 0:
|
||||
minVersion = null;
|
||||
break;
|
||||
case 1:
|
||||
minVersion = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("Illegal boolean value " + hasMinVersion, input);
|
||||
}
|
||||
|
||||
final int docCount = input.readInt();
|
||||
if (docCount < 0) {
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||
}
|
||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||
|
||||
final Map<String,String> diagnostics = input.readMapOfStrings();
|
||||
final Set<String> files = input.readSetOfStrings();
|
||||
final Map<String,String> attributes = input.readMapOfStrings();
|
||||
|
||||
int numSortFields = input.readVInt();
|
||||
Sort indexSort;
|
||||
if (numSortFields > 0) {
|
||||
SortField[] sortFields = new SortField[numSortFields];
|
||||
for(int i=0;i<numSortFields;i++) {
|
||||
String name = input.readString();
|
||||
sortFields[i] = SortFieldProvider.forName(name).readSortField(input);
|
||||
}
|
||||
indexSort = new Sort(sortFields);
|
||||
} else if (numSortFields < 0) {
|
||||
throw new CorruptIndexException("invalid index sort field count: " + numSortFields, input);
|
||||
} else {
|
||||
indexSort = null;
|
||||
}
|
||||
|
||||
SegmentInfo si = new SegmentInfo(dir, version, minVersion, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
|
||||
si.setFiles(files);
|
||||
return si;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(si.name, "", SI_EXTENSION);
|
||||
|
@ -153,65 +166,60 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
|
|||
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
|
||||
// Only add the file once we've successfully created it, else IFD assert can trip:
|
||||
si.addFile(fileName);
|
||||
CodecUtil.writeIndexHeader(output,
|
||||
CODEC_NAME,
|
||||
VERSION_CURRENT,
|
||||
si.getId(),
|
||||
"");
|
||||
Version version = si.getVersion();
|
||||
if (version.major < 7) {
|
||||
throw new IllegalArgumentException("invalid major version: should be >= 7 but got: " + version.major + " segment=" + si);
|
||||
}
|
||||
// Write the Lucene version that created this segment, since 3.1
|
||||
output.writeInt(version.major);
|
||||
output.writeInt(version.minor);
|
||||
output.writeInt(version.bugfix);
|
||||
CodecUtil.writeIndexHeader(output, CODEC_NAME, VERSION_CURRENT, si.getId(), "");
|
||||
|
||||
// Write the min Lucene version that contributed docs to the segment, since 7.0
|
||||
if (si.getMinVersion() != null) {
|
||||
output.writeByte((byte) 1);
|
||||
Version minVersion = si.getMinVersion();
|
||||
output.writeInt(minVersion.major);
|
||||
output.writeInt(minVersion.minor);
|
||||
output.writeInt(minVersion.bugfix);
|
||||
} else {
|
||||
output.writeByte((byte) 0);
|
||||
}
|
||||
|
||||
assert version.prerelease == 0;
|
||||
output.writeInt(si.maxDoc());
|
||||
|
||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||
output.writeMapOfStrings(si.getDiagnostics());
|
||||
Set<String> files = si.files();
|
||||
for (String file : files) {
|
||||
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
|
||||
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
|
||||
}
|
||||
}
|
||||
output.writeSetOfStrings(files);
|
||||
output.writeMapOfStrings(si.getAttributes());
|
||||
|
||||
Sort indexSort = si.getIndexSort();
|
||||
int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
|
||||
output.writeVInt(numSortFields);
|
||||
for (int i = 0; i < numSortFields; ++i) {
|
||||
SortField sortField = indexSort.getSort()[i];
|
||||
IndexSorter sorter = sortField.getIndexSorter();
|
||||
if (sorter == null) {
|
||||
throw new IllegalArgumentException("cannot serialize SortField " + sortField);
|
||||
}
|
||||
output.writeString(sorter.getProviderName());
|
||||
SortFieldProvider.write(sortField, output);
|
||||
}
|
||||
writeSegmentInfo(output, si);
|
||||
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
}
|
||||
|
||||
/** File extension used to store {@link SegmentInfo}. */
|
||||
public final static String SI_EXTENSION = "si";
|
||||
static final String CODEC_NAME = "Lucene86SegmentInfo";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
private void writeSegmentInfo(DataOutput output, SegmentInfo si) throws IOException {
|
||||
Version version = si.getVersion();
|
||||
if (version.major < 7) {
|
||||
throw new IllegalArgumentException("invalid major version: should be >= 7 but got: " + version.major + " segment=" + si);
|
||||
}
|
||||
// Write the Lucene version that created this segment, since 3.1
|
||||
output.writeInt(version.major);
|
||||
output.writeInt(version.minor);
|
||||
output.writeInt(version.bugfix);
|
||||
|
||||
// Write the min Lucene version that contributed docs to the segment, since 7.0
|
||||
if (si.getMinVersion() != null) {
|
||||
output.writeByte((byte) 1);
|
||||
Version minVersion = si.getMinVersion();
|
||||
output.writeInt(minVersion.major);
|
||||
output.writeInt(minVersion.minor);
|
||||
output.writeInt(minVersion.bugfix);
|
||||
} else {
|
||||
output.writeByte((byte) 0);
|
||||
}
|
||||
|
||||
assert version.prerelease == 0;
|
||||
output.writeInt(si.maxDoc());
|
||||
|
||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||
output.writeMapOfStrings(si.getDiagnostics());
|
||||
Set<String> files = si.files();
|
||||
for (String file : files) {
|
||||
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
|
||||
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
|
||||
}
|
||||
}
|
||||
output.writeSetOfStrings(files);
|
||||
output.writeMapOfStrings(si.getAttributes());
|
||||
|
||||
Sort indexSort = si.getIndexSort();
|
||||
int numSortFields = indexSort == null ? 0 : indexSort.getSort().length;
|
||||
output.writeVInt(numSortFields);
|
||||
for (int i = 0; i < numSortFields; ++i) {
|
||||
SortField sortField = indexSort.getSort()[i];
|
||||
IndexSorter sorter = sortField.getIndexSorter();
|
||||
if (sorter == null) {
|
||||
throw new IllegalArgumentException("cannot serialize SortField " + sortField);
|
||||
}
|
||||
output.writeString(sorter.getProviderName());
|
||||
SortFieldProvider.write(sortField, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -336,104 +336,9 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
infos.generation = generation;
|
||||
infos.lastGeneration = generation;
|
||||
infos.luceneVersion = luceneVersion;
|
||||
|
||||
infos.version = input.readLong();
|
||||
//System.out.println("READ sis version=" + infos.version);
|
||||
if (format > VERSION_70) {
|
||||
infos.counter = input.readVLong();
|
||||
} else {
|
||||
infos.counter = input.readInt();
|
||||
}
|
||||
int numSegments = input.readInt();
|
||||
if (numSegments < 0) {
|
||||
throw new CorruptIndexException("invalid segment count: " + numSegments, input);
|
||||
}
|
||||
|
||||
if (numSegments > 0) {
|
||||
infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
||||
} else {
|
||||
// else leave as null: no segments
|
||||
}
|
||||
|
||||
long totalDocs = 0;
|
||||
for (int seg = 0; seg < numSegments; seg++) {
|
||||
String segName = input.readString();
|
||||
byte[] segmentID = new byte[StringHelper.ID_LENGTH];
|
||||
input.readBytes(segmentID, 0, segmentID.length);
|
||||
Codec codec = readCodec(input);
|
||||
SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
|
||||
info.setCodec(codec);
|
||||
totalDocs += info.maxDoc();
|
||||
long delGen = input.readLong();
|
||||
int delCount = input.readInt();
|
||||
if (delCount < 0 || delCount > info.maxDoc()) {
|
||||
throw new CorruptIndexException("invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
|
||||
}
|
||||
long fieldInfosGen = input.readLong();
|
||||
long dvGen = input.readLong();
|
||||
int softDelCount = format > VERSION_72 ? input.readInt() : 0;
|
||||
if (softDelCount < 0 || softDelCount > info.maxDoc()) {
|
||||
throw new CorruptIndexException("invalid deletion count: " + softDelCount + " vs maxDoc=" + info.maxDoc(), input);
|
||||
}
|
||||
if (softDelCount + delCount > info.maxDoc()) {
|
||||
throw new CorruptIndexException("invalid deletion count: " + (softDelCount + delCount) + " vs maxDoc=" + info.maxDoc(), input);
|
||||
}
|
||||
final byte[] sciId;
|
||||
if (format > VERSION_74) {
|
||||
byte marker = input.readByte();
|
||||
switch (marker) {
|
||||
case 1:
|
||||
sciId = new byte[StringHelper.ID_LENGTH];
|
||||
input.readBytes(sciId, 0, sciId.length);
|
||||
break;
|
||||
case 0:
|
||||
sciId = null;
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("invalid SegmentCommitInfo ID marker: " + marker, input);
|
||||
}
|
||||
} else {
|
||||
sciId = null;
|
||||
}
|
||||
SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId);
|
||||
siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
|
||||
final Map<Integer,Set<String>> dvUpdateFiles;
|
||||
final int numDVFields = input.readInt();
|
||||
if (numDVFields == 0) {
|
||||
dvUpdateFiles = Collections.emptyMap();
|
||||
} else {
|
||||
Map<Integer,Set<String>> map = new HashMap<>(numDVFields);
|
||||
for (int i = 0; i < numDVFields; i++) {
|
||||
map.put(input.readInt(), input.readSetOfStrings());
|
||||
}
|
||||
dvUpdateFiles = Collections.unmodifiableMap(map);
|
||||
}
|
||||
siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
|
||||
infos.add(siPerCommit);
|
||||
|
||||
Version segmentVersion = info.getVersion();
|
||||
|
||||
if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
|
||||
throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input);
|
||||
}
|
||||
|
||||
if (infos.indexCreatedVersionMajor >= 7 && segmentVersion.major < infos.indexCreatedVersionMajor) {
|
||||
throw new CorruptIndexException("segments file recorded indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor + " but segment=" + info + " has older version=" + segmentVersion, input);
|
||||
}
|
||||
|
||||
if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) {
|
||||
throw new CorruptIndexException("segments infos must record minVersion with indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor, input);
|
||||
}
|
||||
}
|
||||
|
||||
infos.userData = input.readMapOfStrings();
|
||||
|
||||
// LUCENE-6299: check we are in bounds
|
||||
if (totalDocs > IndexWriter.getActualMaxDocs()) {
|
||||
throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input);
|
||||
}
|
||||
|
||||
parseSegmentInfos(directory, input, infos, format);
|
||||
return infos;
|
||||
|
||||
} catch (Throwable t) {
|
||||
priorE = t;
|
||||
} finally {
|
||||
|
@ -446,6 +351,104 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
throw new Error("Unreachable code");
|
||||
}
|
||||
|
||||
private static void parseSegmentInfos(Directory directory, DataInput input, SegmentInfos infos, int format) throws IOException {
|
||||
infos.version = input.readLong();
|
||||
//System.out.println("READ sis version=" + infos.version);
|
||||
if (format > VERSION_70) {
|
||||
infos.counter = input.readVLong();
|
||||
} else {
|
||||
infos.counter = input.readInt();
|
||||
}
|
||||
int numSegments = input.readInt();
|
||||
if (numSegments < 0) {
|
||||
throw new CorruptIndexException("invalid segment count: " + numSegments, input);
|
||||
}
|
||||
|
||||
if (numSegments > 0) {
|
||||
infos.minSegmentLuceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
|
||||
} else {
|
||||
// else leave as null: no segments
|
||||
}
|
||||
|
||||
long totalDocs = 0;
|
||||
for (int seg = 0; seg < numSegments; seg++) {
|
||||
String segName = input.readString();
|
||||
byte[] segmentID = new byte[StringHelper.ID_LENGTH];
|
||||
input.readBytes(segmentID, 0, segmentID.length);
|
||||
Codec codec = readCodec(input);
|
||||
SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, segmentID, IOContext.READ);
|
||||
info.setCodec(codec);
|
||||
totalDocs += info.maxDoc();
|
||||
long delGen = input.readLong();
|
||||
int delCount = input.readInt();
|
||||
if (delCount < 0 || delCount > info.maxDoc()) {
|
||||
throw new CorruptIndexException("invalid deletion count: " + delCount + " vs maxDoc=" + info.maxDoc(), input);
|
||||
}
|
||||
long fieldInfosGen = input.readLong();
|
||||
long dvGen = input.readLong();
|
||||
int softDelCount = format > VERSION_72 ? input.readInt() : 0;
|
||||
if (softDelCount < 0 || softDelCount > info.maxDoc()) {
|
||||
throw new CorruptIndexException("invalid deletion count: " + softDelCount + " vs maxDoc=" + info.maxDoc(), input);
|
||||
}
|
||||
if (softDelCount + delCount > info.maxDoc()) {
|
||||
throw new CorruptIndexException("invalid deletion count: " + (softDelCount + delCount) + " vs maxDoc=" + info.maxDoc(), input);
|
||||
}
|
||||
final byte[] sciId;
|
||||
if (format > VERSION_74) {
|
||||
byte marker = input.readByte();
|
||||
switch (marker) {
|
||||
case 1:
|
||||
sciId = new byte[StringHelper.ID_LENGTH];
|
||||
input.readBytes(sciId, 0, sciId.length);
|
||||
break;
|
||||
case 0:
|
||||
sciId = null;
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("invalid SegmentCommitInfo ID marker: " + marker, input);
|
||||
}
|
||||
} else {
|
||||
sciId = null;
|
||||
}
|
||||
SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId);
|
||||
siPerCommit.setFieldInfosFiles(input.readSetOfStrings());
|
||||
final Map<Integer,Set<String>> dvUpdateFiles;
|
||||
final int numDVFields = input.readInt();
|
||||
if (numDVFields == 0) {
|
||||
dvUpdateFiles = Collections.emptyMap();
|
||||
} else {
|
||||
Map<Integer,Set<String>> map = new HashMap<>(numDVFields);
|
||||
for (int i = 0; i < numDVFields; i++) {
|
||||
map.put(input.readInt(), input.readSetOfStrings());
|
||||
}
|
||||
dvUpdateFiles = Collections.unmodifiableMap(map);
|
||||
}
|
||||
siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles);
|
||||
infos.add(siPerCommit);
|
||||
|
||||
Version segmentVersion = info.getVersion();
|
||||
|
||||
if (segmentVersion.onOrAfter(infos.minSegmentLuceneVersion) == false) {
|
||||
throw new CorruptIndexException("segments file recorded minSegmentLuceneVersion=" + infos.minSegmentLuceneVersion + " but segment=" + info + " has older version=" + segmentVersion, input);
|
||||
}
|
||||
|
||||
if (infos.indexCreatedVersionMajor >= 7 && segmentVersion.major < infos.indexCreatedVersionMajor) {
|
||||
throw new CorruptIndexException("segments file recorded indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor + " but segment=" + info + " has older version=" + segmentVersion, input);
|
||||
}
|
||||
|
||||
if (infos.indexCreatedVersionMajor >= 7 && info.getMinVersion() == null) {
|
||||
throw new CorruptIndexException("segments infos must record minVersion with indexCreatedVersionMajor=" + infos.indexCreatedVersionMajor, input);
|
||||
}
|
||||
}
|
||||
|
||||
infos.userData = input.readMapOfStrings();
|
||||
|
||||
// LUCENE-6299: check we are in bounds
|
||||
if (totalDocs > IndexWriter.getActualMaxDocs()) {
|
||||
throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input);
|
||||
}
|
||||
}
|
||||
|
||||
private static Codec readCodec(DataInput input) throws IOException {
|
||||
final String name = input.readString();
|
||||
try {
|
||||
|
|
Loading…
Reference in New Issue