LUCENE-9627: Remove unused Lucene50FieldInfosFormat codec and small refactor some codecs to separate reading header/footer from reading content of the file

This commit is contained in:
Ignacio Vera 2020-12-15 10:13:25 +01:00 committed by GitHub
parent 94c69f4385
commit 4b3e8d7ce8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 731 additions and 967 deletions

View File

@ -220,6 +220,9 @@ Other
* LUCENE-9544: add regenerate gradle script for nori dictionary (Namgyu Kim) * LUCENE-9544: add regenerate gradle script for nori dictionary (Namgyu Kim)
* LUCENE-9627: Remove unused Lucene50FieldInfosFormat codec and small refactor some codecs
to separate reading header/footer from reading content of the file. (Ignacio Vera)
======================= Lucene 8.8.0 ======================= ======================= Lucene 8.8.0 =======================
New Features New Features

View File

@ -34,6 +34,7 @@ import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField; import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
@ -80,6 +81,12 @@ import org.apache.lucene.util.Version;
*/ */
public class Lucene70SegmentInfoFormat extends SegmentInfoFormat { public class Lucene70SegmentInfoFormat extends SegmentInfoFormat {
/** File extension used to store {@link SegmentInfo}. */
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene70SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
/** Sole constructor. */ /** Sole constructor. */
public Lucene70SegmentInfoFormat() { public Lucene70SegmentInfoFormat() {
} }
@ -95,6 +102,19 @@ public class Lucene70SegmentInfoFormat extends SegmentInfoFormat {
Lucene70SegmentInfoFormat.VERSION_START, Lucene70SegmentInfoFormat.VERSION_START,
Lucene70SegmentInfoFormat.VERSION_CURRENT, Lucene70SegmentInfoFormat.VERSION_CURRENT,
segmentID, ""); segmentID, "");
si = parseSegmentInfo(dir, input, segment, segmentID);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si;
}
}
private SegmentInfo parseSegmentInfo(Directory dir, DataInput input, String segment, byte[] segmentID) throws IOException {
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt()); final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
byte hasMinVersion = input.readByte(); byte hasMinVersion = input.readByte();
final Version minVersion; final Version minVersion;
@ -257,25 +277,13 @@ public class Lucene70SegmentInfoFormat extends SegmentInfoFormat {
indexSort = null; indexSort = null;
} }
si = new SegmentInfo(dir, version, minVersion, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort); SegmentInfo si = new SegmentInfo(dir, version, minVersion, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
si.setFiles(files); si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si; return si;
} }
}
@Override @Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
throw new UnsupportedOperationException("Old formats can't be used for writing"); throw new UnsupportedOperationException("Old formats can't be used for writing");
} }
/** File extension used to store {@link SegmentInfo}. */
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene70SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
} }

View File

@ -51,6 +51,14 @@ public class Lucene70RWSegmentInfoFormat extends Lucene70SegmentInfoFormat {
Lucene70SegmentInfoFormat.VERSION_CURRENT, Lucene70SegmentInfoFormat.VERSION_CURRENT,
si.getId(), si.getId(),
""); "");
writeSegmentInfo(output, si);
CodecUtil.writeFooter(output);
}
}
private void writeSegmentInfo(IndexOutput output, SegmentInfo si) throws IOException {
Version version = si.getVersion(); Version version = si.getVersion();
if (version.major < 7) { if (version.major < 7) {
throw new IllegalArgumentException("invalid major version: should be >= 7 but got: " + version.major + " segment=" + si); throw new IllegalArgumentException("invalid major version: should be >= 7 but got: " + version.major + " segment=" + si);
@ -196,9 +204,5 @@ public class Lucene70RWSegmentInfoFormat extends Lucene70SegmentInfoFormat {
} }
} }
} }
CodecUtil.writeFooter(output);
} }
}
} }

View File

@ -62,6 +62,15 @@ import org.apache.lucene.store.IndexOutput;
*/ */
public final class Lucene50CompoundFormat extends CompoundFormat { public final class Lucene50CompoundFormat extends CompoundFormat {
/** Extension of compound file */
static final String DATA_EXTENSION = "cfs";
/** Extension of compound file entries */
static final String ENTRIES_EXTENSION = "cfe";
static final String DATA_CODEC = "Lucene50CompoundData";
static final String ENTRY_CODEC = "Lucene50CompoundEntries";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
/** Sole constructor. */ /** Sole constructor. */
public Lucene50CompoundFormat() { public Lucene50CompoundFormat() {
} }
@ -81,10 +90,17 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), ""); CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), "");
CodecUtil.writeIndexHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), ""); CodecUtil.writeIndexHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), "");
writeCompoundFile(entries, data, dir, si);
CodecUtil.writeFooter(data);
CodecUtil.writeFooter(entries);
}
}
private void writeCompoundFile(IndexOutput entries, IndexOutput data, Directory dir, SegmentInfo si) throws IOException {
// write number of files // write number of files
entries.writeVInt(si.files().size()); entries.writeVInt(si.files().size());
for (String file : si.files()) { for (String file : si.files()) {
// write bytes for file // write bytes for file
long startOffset = data.getFilePointer(); long startOffset = data.getFilePointer();
try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) { try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) {
@ -114,18 +130,5 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
entries.writeLong(startOffset); entries.writeLong(startOffset);
entries.writeLong(length); entries.writeLong(length);
} }
CodecUtil.writeFooter(data);
CodecUtil.writeFooter(entries);
} }
}
/** Extension of compound file */
static final String DATA_EXTENSION = "cfs";
/** Extension of compound file entries */
static final String ENTRIES_EXTENSION = "cfe";
static final String DATA_CODEC = "Lucene50CompoundData";
static final String ENTRY_CODEC = "Lucene50CompoundEntries";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
} }

View File

@ -100,15 +100,28 @@ final class Lucene50CompoundReader extends CompoundDirectory {
/** Helper method that reads CFS entries from an input stream */ /** Helper method that reads CFS entries from an input stream */
private Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException { private Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
Map<String,FileEntry> mapping = null; Map<String, FileEntry> mapping = null;
try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) { try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
Throwable priorE = null; Throwable priorE = null;
try { try {
version = CodecUtil.checkIndexHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC, version = CodecUtil.checkIndexHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC,
Lucene50CompoundFormat.VERSION_START, Lucene50CompoundFormat.VERSION_START,
Lucene50CompoundFormat.VERSION_CURRENT, segmentID, ""); Lucene50CompoundFormat.VERSION_CURRENT, segmentID, "");
mapping = readMapping(entriesStream);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(entriesStream, priorE);
}
}
return Collections.unmodifiableMap(mapping);
}
private Map<String,FileEntry> readMapping(IndexInput entriesStream) throws IOException {
final int numEntries = entriesStream.readVInt(); final int numEntries = entriesStream.readVInt();
mapping = new HashMap<>(numEntries); Map<String,FileEntry> mapping = new HashMap<>(numEntries);
for (int i = 0; i < numEntries; i++) { for (int i = 0; i < numEntries; i++) {
final FileEntry fileEntry = new FileEntry(); final FileEntry fileEntry = new FileEntry();
final String id = entriesStream.readString(); final String id = entriesStream.readString();
@ -119,13 +132,7 @@ final class Lucene50CompoundReader extends CompoundDirectory {
fileEntry.offset = entriesStream.readLong(); fileEntry.offset = entriesStream.readLong();
fileEntry.length = entriesStream.readLong(); fileEntry.length = entriesStream.readLong();
} }
} catch (Throwable exception) { return mapping;
priorE = exception;
} finally {
CodecUtil.checkFooter(entriesStream, priorE);
}
}
return Collections.unmodifiableMap(mapping);
} }
@Override @Override

View File

@ -1,296 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
/**
* Lucene 5.0 Field Infos format.
* <p>Field names are stored in the field info file, with suffix <code>.fnm</code>.
* <p>FieldInfos (.fnm) --&gt; Header,FieldsCount, &lt;FieldName,FieldNumber,
* FieldBits,DocValuesBits,DocValuesGen,Attributes&gt; <sup>FieldsCount</sup>,Footer
* <p>Data types:
* <ul>
* <li>Header --&gt; {@link CodecUtil#checkIndexHeader IndexHeader}</li>
* <li>FieldsCount --&gt; {@link DataOutput#writeVInt VInt}</li>
* <li>FieldName --&gt; {@link DataOutput#writeString String}</li>
* <li>FieldBits, IndexOptions, DocValuesBits --&gt; {@link DataOutput#writeByte Byte}</li>
* <li>FieldNumber --&gt; {@link DataOutput#writeInt VInt}</li>
* <li>Attributes --&gt; {@link DataOutput#writeMapOfStrings Map&lt;String,String&gt;}</li>
* <li>DocValuesGen --&gt; {@link DataOutput#writeLong(long) Int64}</li>
* <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
* Field Descriptions:
* <ul>
* <li>FieldsCount: the number of fields in this file.</li>
* <li>FieldName: name of the field as a UTF-8 String.</li>
* <li>FieldNumber: the field's number. Note that unlike previous versions of
* Lucene, the fields are not numbered implicitly by their order in the
* file, instead explicitly.</li>
* <li>FieldBits: a byte containing field options.
* <ul>
* <li>The low order bit (0x1) is one for fields that have term vectors
* stored, and zero for fields without term vectors.</li>
* <li>If the second lowest order-bit is set (0x2), norms are omitted for the
* indexed field.</li>
* <li>If the third lowest-order bit is set (0x4), payloads are stored for the
* indexed field.</li>
* </ul>
* </li>
* <li>IndexOptions: a byte containing index options.
* <ul>
* <li>0: not indexed</li>
* <li>1: indexed as DOCS_ONLY</li>
* <li>2: indexed as DOCS_AND_FREQS</li>
* <li>3: indexed as DOCS_AND_FREQS_AND_POSITIONS</li>
* <li>4: indexed as DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS</li>
* </ul>
* </li>
* <li>DocValuesBits: a byte containing per-document value types. The type
* recorded as two four-bit integers, with the high-order bits representing
* <code>norms</code> options, and the low-order bits representing
* {@code DocValues} options. Each four-bit integer can be decoded as such:
* <ul>
* <li>0: no DocValues for this field.</li>
* <li>1: NumericDocValues. ({@link DocValuesType#NUMERIC})</li>
* <li>2: BinaryDocValues. ({@code DocValuesType#BINARY})</li>
* <li>3: SortedDocValues. ({@code DocValuesType#SORTED})</li>
* </ul>
* </li>
* <li>DocValuesGen is the generation count of the field's DocValues. If this is -1,
* there are no DocValues updates to that field. Anything above zero means there
* are updates stored by {@link DocValuesFormat}.</li>
* <li>Attributes: a key-value map of codec-private attributes.</li>
* </ul>
*
* @lucene.experimental
*/
public final class Lucene50FieldInfosFormat extends FieldInfosFormat {
/** Sole constructor. */
public Lucene50FieldInfosFormat() {
}
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
Throwable priorE = null;
FieldInfo infos[] = null;
try {
CodecUtil.checkIndexHeader(input, Lucene50FieldInfosFormat.CODEC_NAME,
Lucene50FieldInfosFormat.FORMAT_START,
Lucene50FieldInfosFormat.FORMAT_CURRENT,
segmentInfo.getId(), segmentSuffix);
final int size = input.readVInt(); //read in the size
infos = new FieldInfo[size];
// previous field's attribute map, we share when possible:
Map<String,String> lastAttributes = Collections.emptyMap();
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
if (fieldNumber < 0) {
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
}
byte bits = input.readByte();
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & OMIT_NORMS) != 0;
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
final IndexOptions indexOptions = getIndexOptions(input, input.readByte());
// DV Types are packed in one byte
final DocValuesType docValuesType = getDocValuesType(input, input.readByte());
final long dvGen = input.readLong();
Map<String,String> attributes = input.readMapOfStrings();
// just use the last field's map if its the same
if (attributes.equals(lastAttributes)) {
attributes = lastAttributes;
}
lastAttributes = attributes;
try {
infos[i] = new FieldInfo(name, fieldNumber, storeTermVector, omitNorms, storePayloads,
indexOptions, docValuesType, dvGen, attributes, 0, 0, 0,
0, VectorValues.SearchStrategy.NONE, false);
} catch (IllegalStateException e) {
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
}
}
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return new FieldInfos(infos);
}
}
static {
// We "mirror" DocValues enum values with the constants below; let's try to ensure if we add a new DocValuesType while this format is
// still used for writing, we remember to fix this encoding:
assert DocValuesType.values().length == 6;
}
private static byte docValuesByte(DocValuesType type) {
switch(type) {
case NONE:
return 0;
case NUMERIC:
return 1;
case BINARY:
return 2;
case SORTED:
return 3;
case SORTED_SET:
return 4;
case SORTED_NUMERIC:
return 5;
default:
// BUG
throw new AssertionError("unhandled DocValuesType: " + type);
}
}
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
switch(b) {
case 0:
return DocValuesType.NONE;
case 1:
return DocValuesType.NUMERIC;
case 2:
return DocValuesType.BINARY;
case 3:
return DocValuesType.SORTED;
case 4:
return DocValuesType.SORTED_SET;
case 5:
return DocValuesType.SORTED_NUMERIC;
default:
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
}
}
static {
// We "mirror" IndexOptions enum values with the constants below; let's try to ensure if we add a new IndexOption while this format is
// still used for writing, we remember to fix this encoding:
assert IndexOptions.values().length == 5;
}
private static byte indexOptionsByte(IndexOptions indexOptions) {
switch (indexOptions) {
case NONE:
return 0;
case DOCS:
return 1;
case DOCS_AND_FREQS:
return 2;
case DOCS_AND_FREQS_AND_POSITIONS:
return 3;
case DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS:
return 4;
default:
// BUG:
throw new AssertionError("unhandled IndexOptions: " + indexOptions);
}
}
private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException {
switch (b) {
case 0:
return IndexOptions.NONE;
case 1:
return IndexOptions.DOCS;
case 2:
return IndexOptions.DOCS_AND_FREQS;
case 3:
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
case 4:
return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
default:
// BUG
throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
}
}
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
try (IndexOutput output = directory.createOutput(fileName, context)) {
CodecUtil.writeIndexHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
output.writeVInt(infos.size());
for (FieldInfo fi : infos) {
fi.checkConsistency();
output.writeString(fi.name);
output.writeVInt(fi.number);
byte bits = 0x0;
if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
if (fi.omitsNorms()) bits |= OMIT_NORMS;
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
output.writeByte(bits);
output.writeByte(indexOptionsByte(fi.getIndexOptions()));
// pack the DV type and hasNorms in one byte
output.writeByte(docValuesByte(fi.getDocValuesType()));
output.writeLong(fi.getDocValuesGen());
output.writeMapOfStrings(fi.attributes());
}
CodecUtil.writeFooter(output);
}
}
/** Extension of field infos */
static final String EXTENSION = "fnm";
// Codec header
static final String CODEC_NAME = "Lucene50FieldInfos";
static final int FORMAT_SAFE_MAPS = 1;
static final int FORMAT_START = FORMAT_SAFE_MAPS;
static final int FORMAT_CURRENT = FORMAT_SAFE_MAPS;
// Field flags
static final byte STORE_TERMVECTOR = 0x1;
static final byte OMIT_NORMS = 0x2;
static final byte STORE_PAYLOADS = 0x4;
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -47,10 +48,6 @@ import org.apache.lucene.util.FixedBitSet;
*/ */
public final class Lucene50LiveDocsFormat extends LiveDocsFormat { public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
/** Sole constructor. */
public Lucene50LiveDocsFormat() {
}
/** extension of live docs */ /** extension of live docs */
private static final String EXTENSION = "liv"; private static final String EXTENSION = "liv";
@ -61,6 +58,10 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
private static final int VERSION_START = 0; private static final int VERSION_START = 0;
private static final int VERSION_CURRENT = VERSION_START; private static final int VERSION_CURRENT = VERSION_START;
/** Sole constructor. */
public Lucene50LiveDocsFormat() {
}
@Override @Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException { public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
long gen = info.getDelGen(); long gen = info.getDelGen();
@ -71,11 +72,9 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
try { try {
CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT,
info.info.getId(), Long.toString(gen, Character.MAX_RADIX)); info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
long data[] = new long[FixedBitSet.bits2words(length)];
for (int i = 0; i < data.length; i++) { FixedBitSet fbs = readFixedBitSet(input, length);
data[i] = input.readLong();
}
FixedBitSet fbs = new FixedBitSet(data, length);
if (fbs.length() - fbs.cardinality() != info.getDelCount()) { if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) + throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) +
" info.delcount=" + info.getDelCount(), input); " info.delcount=" + info.getDelCount(), input);
@ -90,13 +89,35 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
throw new AssertionError(); throw new AssertionError();
} }
private FixedBitSet readFixedBitSet(IndexInput input, int length) throws IOException {
long data[] = new long[FixedBitSet.bits2words(length)];
for (int i = 0; i < data.length; i++) {
data[i] = input.readLong();
}
return new FixedBitSet(data, length);
}
@Override @Override
public void writeLiveDocs(Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException { public void writeLiveDocs(Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException {
long gen = info.getNextDelGen(); long gen = info.getNextDelGen();
String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen); String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
int delCount = 0; int delCount;
try (IndexOutput output = dir.createOutput(name, context)) { try (IndexOutput output = dir.createOutput(name, context)) {
CodecUtil.writeIndexHeader(output, CODEC_NAME, VERSION_CURRENT, info.info.getId(), Long.toString(gen, Character.MAX_RADIX)); CodecUtil.writeIndexHeader(output, CODEC_NAME, VERSION_CURRENT, info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
delCount = writeBits(output, bits);
CodecUtil.writeFooter(output);
}
if (delCount != info.getDelCount() + newDelCount) {
throw new CorruptIndexException("bits.deleted=" + delCount +
" info.delcount=" + info.getDelCount() + " newdelcount=" + newDelCount, name);
}
}
private int writeBits(IndexOutput output, Bits bits) throws IOException {
int delCount = 0;
final int longCount = FixedBitSet.bits2words(bits.length()); final int longCount = FixedBitSet.bits2words(bits.length());
for (int i = 0; i < longCount; ++i) { for (int i = 0; i < longCount; ++i) {
long currentBits = 0; long currentBits = 0;
@ -109,12 +130,7 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
} }
output.writeLong(currentBits); output.writeLong(currentBits);
} }
CodecUtil.writeFooter(output); return delCount;
}
if (delCount != info.getDelCount() + newDelCount) {
throw new CorruptIndexException("bits.deleted=" + delCount +
" info.delcount=" + info.getDelCount() + " newdelcount=" + newDelCount, name);
}
} }
@Override @Override

View File

@ -104,6 +104,22 @@ import org.apache.lucene.store.IndexOutput;
*/ */
public final class Lucene60FieldInfosFormat extends FieldInfosFormat { public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
/** Extension of field infos */
static final String EXTENSION = "fnm";
// Codec header
static final String CODEC_NAME = "Lucene60FieldInfos";
static final int FORMAT_START = 0;
static final int FORMAT_SOFT_DELETES = 1;
static final int FORMAT_SELECTIVE_INDEXING = 2;
static final int FORMAT_CURRENT = FORMAT_SELECTIVE_INDEXING;
// Field flags
static final byte STORE_TERMVECTOR = 0x1;
static final byte OMIT_NORMS = 0x2;
static final byte STORE_PAYLOADS = 0x4;
static final byte SOFT_DELETES_FIELD = 0x8;
/** Sole constructor. */ /** Sole constructor. */
public Lucene60FieldInfosFormat() { public Lucene60FieldInfosFormat() {
} }
@ -113,7 +129,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION); final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) { try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
Throwable priorE = null; Throwable priorE = null;
FieldInfo infos[] = null; FieldInfo[] infos = null;
try { try {
int version = CodecUtil.checkIndexHeader(input, int version = CodecUtil.checkIndexHeader(input,
Lucene60FieldInfosFormat.CODEC_NAME, Lucene60FieldInfosFormat.CODEC_NAME,
@ -121,8 +137,20 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
Lucene60FieldInfosFormat.FORMAT_CURRENT, Lucene60FieldInfosFormat.FORMAT_CURRENT,
segmentInfo.getId(), segmentSuffix); segmentInfo.getId(), segmentSuffix);
infos = readFieldInfos(input, version);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return new FieldInfos(infos);
}
}
private FieldInfo[] readFieldInfos(IndexInput input, int version) throws IOException {
final int size = input.readVInt(); //read in the size final int size = input.readVInt(); //read in the size
infos = new FieldInfo[size]; FieldInfo[] infos = new FieldInfo[size];
// previous field's attribute map, we share when possible: // previous field's attribute map, we share when possible:
Map<String,String> lastAttributes = Collections.emptyMap(); Map<String,String> lastAttributes = Collections.emptyMap();
@ -171,13 +199,7 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e); throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
} }
} }
} catch (Throwable exception) { return infos;
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return new FieldInfos(infos);
}
} }
static { static {
@ -301,20 +323,4 @@ public final class Lucene60FieldInfosFormat extends FieldInfosFormat {
CodecUtil.writeFooter(output); CodecUtil.writeFooter(output);
} }
} }
/** Extension of field infos */
static final String EXTENSION = "fnm";
// Codec header
static final String CODEC_NAME = "Lucene60FieldInfos";
static final int FORMAT_START = 0;
static final int FORMAT_SOFT_DELETES = 1;
static final int FORMAT_SELECTIVE_INDEXING = 2;
static final int FORMAT_CURRENT = FORMAT_SELECTIVE_INDEXING;
// Field flags
static final byte STORE_TERMVECTOR = 0x1;
static final byte OMIT_NORMS = 0x2;
static final byte STORE_PAYLOADS = 0x4;
static final byte SOFT_DELETES_FIELD = 0x8;
} }

View File

@ -78,7 +78,9 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
Lucene80DocValuesFormat.VERSION_CURRENT, Lucene80DocValuesFormat.VERSION_CURRENT,
state.segmentInfo.getId(), state.segmentInfo.getId(),
state.segmentSuffix); state.segmentSuffix);
readFields(in, state.fieldInfos); readFields(in, state.fieldInfos);
} catch (Throwable exception) { } catch (Throwable exception) {
priorE = exception; priorE = exception;
} finally { } finally {
@ -113,7 +115,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
} }
} }
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException { private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
FieldInfo info = infos.fieldInfo(fieldNumber); FieldInfo info = infos.fieldInfo(fieldNumber);
if (info == null) { if (info == null) {
@ -136,13 +138,13 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
} }
} }
private NumericEntry readNumeric(ChecksumIndexInput meta) throws IOException { private NumericEntry readNumeric(IndexInput meta) throws IOException {
NumericEntry entry = new NumericEntry(); NumericEntry entry = new NumericEntry();
readNumeric(meta, entry); readNumeric(meta, entry);
return entry; return entry;
} }
private void readNumeric(ChecksumIndexInput meta, NumericEntry entry) throws IOException { private void readNumeric(IndexInput meta, NumericEntry entry) throws IOException {
entry.docsWithFieldOffset = meta.readLong(); entry.docsWithFieldOffset = meta.readLong();
entry.docsWithFieldLength = meta.readLong(); entry.docsWithFieldLength = meta.readLong();
entry.jumpTableEntryCount = meta.readShort(); entry.jumpTableEntryCount = meta.readShort();
@ -172,7 +174,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
entry.valueJumpTableOffset = meta.readLong(); entry.valueJumpTableOffset = meta.readLong();
} }
private BinaryEntry readBinary(ChecksumIndexInput meta) throws IOException { private BinaryEntry readBinary(IndexInput meta) throws IOException {
BinaryEntry entry = new BinaryEntry(); BinaryEntry entry = new BinaryEntry();
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) { if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
int b = meta.readByte(); int b = meta.readByte();
@ -218,7 +220,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
return entry; return entry;
} }
private SortedEntry readSorted(ChecksumIndexInput meta) throws IOException { private SortedEntry readSorted(IndexInput meta) throws IOException {
SortedEntry entry = new SortedEntry(); SortedEntry entry = new SortedEntry();
entry.docsWithFieldOffset = meta.readLong(); entry.docsWithFieldOffset = meta.readLong();
entry.docsWithFieldLength = meta.readLong(); entry.docsWithFieldLength = meta.readLong();
@ -232,7 +234,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
return entry; return entry;
} }
private SortedSetEntry readSortedSet(ChecksumIndexInput meta) throws IOException { private SortedSetEntry readSortedSet(IndexInput meta) throws IOException {
SortedSetEntry entry = new SortedSetEntry(); SortedSetEntry entry = new SortedSetEntry();
byte multiValued = meta.readByte(); byte multiValued = meta.readByte();
switch (multiValued) { switch (multiValued) {
@ -261,7 +263,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
return entry; return entry;
} }
private static void readTermDict(ChecksumIndexInput meta, TermsDictEntry entry) throws IOException { private static void readTermDict(IndexInput meta, TermsDictEntry entry) throws IOException {
entry.termsDictSize = meta.readVLong(); entry.termsDictSize = meta.readVLong();
entry.termsDictBlockShift = meta.readInt(); entry.termsDictBlockShift = meta.readInt();
final int blockShift = meta.readInt(); final int blockShift = meta.readInt();
@ -281,7 +283,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer implements Close
entry.termsIndexAddressesLength = meta.readLong(); entry.termsIndexAddressesLength = meta.readLong();
} }
private SortedNumericEntry readSortedNumeric(ChecksumIndexInput meta) throws IOException { private SortedNumericEntry readSortedNumeric(IndexInput meta) throws IOException {
SortedNumericEntry entry = new SortedNumericEntry(); SortedNumericEntry entry = new SortedNumericEntry();
readNumeric(meta, entry); readNumeric(meta, entry);
entry.numDocsWithField = meta.readInt(); entry.numDocsWithField = meta.readInt();

View File

@ -81,6 +81,12 @@ import org.apache.lucene.util.Version;
*/ */
public class Lucene86SegmentInfoFormat extends SegmentInfoFormat { public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
/** File extension used to store {@link SegmentInfo}. */
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene86SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
/** Sole constructor. */ /** Sole constructor. */
public Lucene86SegmentInfoFormat() { public Lucene86SegmentInfoFormat() {
} }
@ -96,6 +102,19 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
VERSION_START, VERSION_START,
VERSION_CURRENT, VERSION_CURRENT,
segmentID, ""); segmentID, "");
si = parseSegmentInfo(dir, input, segment, segmentID);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si;
}
}
private SegmentInfo parseSegmentInfo(Directory dir, DataInput input, String segment, byte[] segmentID) throws IOException {
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt()); final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
byte hasMinVersion = input.readByte(); byte hasMinVersion = input.readByte();
final Version minVersion; final Version minVersion;
@ -135,16 +154,10 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
indexSort = null; indexSort = null;
} }
si = new SegmentInfo(dir, version, minVersion, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort); SegmentInfo si = new SegmentInfo(dir, version, minVersion, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
si.setFiles(files); si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si; return si;
} }
}
@Override @Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
@ -153,11 +166,15 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
try (IndexOutput output = dir.createOutput(fileName, ioContext)) { try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
// Only add the file once we've successfully created it, else IFD assert can trip: // Only add the file once we've successfully created it, else IFD assert can trip:
si.addFile(fileName); si.addFile(fileName);
CodecUtil.writeIndexHeader(output, CodecUtil.writeIndexHeader(output, CODEC_NAME, VERSION_CURRENT, si.getId(), "");
CODEC_NAME,
VERSION_CURRENT, writeSegmentInfo(output, si);
si.getId(),
""); CodecUtil.writeFooter(output);
}
}
private void writeSegmentInfo(DataOutput output, SegmentInfo si) throws IOException {
Version version = si.getVersion(); Version version = si.getVersion();
if (version.major < 7) { if (version.major < 7) {
throw new IllegalArgumentException("invalid major version: should be >= 7 but got: " + version.major + " segment=" + si); throw new IllegalArgumentException("invalid major version: should be >= 7 but got: " + version.major + " segment=" + si);
@ -204,14 +221,5 @@ public class Lucene86SegmentInfoFormat extends SegmentInfoFormat {
output.writeString(sorter.getProviderName()); output.writeString(sorter.getProviderName());
SortFieldProvider.write(sortField, output); SortFieldProvider.write(sortField, output);
} }
CodecUtil.writeFooter(output);
} }
}
/** File extension used to store {@link SegmentInfo}. */
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene86SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
} }

View File

@ -336,7 +336,22 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
infos.generation = generation; infos.generation = generation;
infos.lastGeneration = generation; infos.lastGeneration = generation;
infos.luceneVersion = luceneVersion; infos.luceneVersion = luceneVersion;
parseSegmentInfos(directory, input, infos, format);
return infos;
} catch (Throwable t) {
priorE = t;
} finally {
if (format >= VERSION_70) { // oldest supported version
CodecUtil.checkFooter(input, priorE);
} else {
throw IOUtils.rethrowAlways(priorE);
}
}
throw new Error("Unreachable code");
}
private static void parseSegmentInfos(Directory directory, DataInput input, SegmentInfos infos, int format) throws IOException {
infos.version = input.readLong(); infos.version = input.readLong();
//System.out.println("READ sis version=" + infos.version); //System.out.println("READ sis version=" + infos.version);
if (format > VERSION_70) { if (format > VERSION_70) {
@ -432,18 +447,6 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
if (totalDocs > IndexWriter.getActualMaxDocs()) { if (totalDocs > IndexWriter.getActualMaxDocs()) {
throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input); throw new CorruptIndexException("Too many documents: an index cannot exceed " + IndexWriter.getActualMaxDocs() + " but readers have total maxDoc=" + totalDocs, input);
} }
return infos;
} catch (Throwable t) {
priorE = t;
} finally {
if (format >= VERSION_70) { // oldest supported version
CodecUtil.checkFooter(input, priorE);
} else {
throw IOUtils.rethrowAlways(priorE);
}
}
throw new Error("Unreachable code");
} }
private static Codec readCodec(DataInput input) throws IOException { private static Codec readCodec(DataInput input) throws IOException {