mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 03:25:15 +00:00
LUCENE-4051: Use Codec File Headers for Lucene40 StoredFields, DocValues, Norms & TermVectors
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1341768 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8c037c2115
commit
88b483cbbd
@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentInfo;
|
|||||||
import org.apache.lucene.store.DataOutput; // javadocs
|
import org.apache.lucene.store.DataOutput; // javadocs
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.0 Stored Fields Format.
|
* Lucene 4.0 Stored Fields Format.
|
||||||
@ -42,7 +43,8 @@ import org.apache.lucene.store.IOContext;
|
|||||||
* <p>This contains, for each document, a pointer to its field data, as
|
* <p>This contains, for each document, a pointer to its field data, as
|
||||||
* follows:</p>
|
* follows:</p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>FieldIndex (.fdx) --> <FieldValuesPosition> <sup>SegSize</sup></li>
|
* <li>FieldIndex (.fdx) --> <Header>, <FieldValuesPosition> <sup>SegSize</sup></li>
|
||||||
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
* <li>FieldValuesPosition --> {@link DataOutput#writeLong Uint64}</li>
|
* <li>FieldValuesPosition --> {@link DataOutput#writeLong Uint64}</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* </li>
|
* </li>
|
||||||
@ -50,7 +52,8 @@ import org.apache.lucene.store.IOContext;
|
|||||||
* <p><a name="field_data" id="field_data"></a>The field data, or <tt>.fdt</tt> file.</p>
|
* <p><a name="field_data" id="field_data"></a>The field data, or <tt>.fdt</tt> file.</p>
|
||||||
* <p>This contains the stored fields of each document, as follows:</p>
|
* <p>This contains the stored fields of each document, as follows:</p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>FieldData (.fdt) --> <DocFieldData> <sup>SegSize</sup></li>
|
* <li>FieldData (.fdt) --> <Header>, <DocFieldData> <sup>SegSize</sup></li>
|
||||||
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
* <li>DocFieldData --> FieldCount, <FieldNum, Bits, Value>
|
* <li>DocFieldData --> FieldCount, <FieldNum, Bits, Value>
|
||||||
* <sup>FieldCount</sup></li>
|
* <sup>FieldCount</sup></li>
|
||||||
* <li>FieldCount --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>FieldCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
|
@ -30,11 +30,14 @@ import org.apache.lucene.store.AlreadyClosedException;
|
|||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class responsible for access to stored document fields.
|
* Class responsible for access to stored document fields.
|
||||||
* <p/>
|
* <p/>
|
||||||
@ -44,8 +47,6 @@ import java.util.Set;
|
|||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
|
public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
|
||||||
private final static int FORMAT_SIZE = 4;
|
|
||||||
|
|
||||||
private final FieldInfos fieldInfos;
|
private final FieldInfos fieldInfos;
|
||||||
private final IndexInput fieldsStream;
|
private final IndexInput fieldsStream;
|
||||||
private final IndexInput indexStream;
|
private final IndexInput indexStream;
|
||||||
@ -78,17 +79,15 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||||||
boolean success = false;
|
boolean success = false;
|
||||||
fieldInfos = fn;
|
fieldInfos = fn;
|
||||||
try {
|
try {
|
||||||
fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);
|
fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
|
||||||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
|
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
|
||||||
indexStream = d.openInput(indexStreamFN, context);
|
indexStream = d.openInput(indexStreamFN, context);
|
||||||
|
|
||||||
// its a 4.0 codec: so its not too-old, its corrupt.
|
CodecUtil.checkHeader(indexStream, CODEC_NAME_IDX, VERSION_START, VERSION_CURRENT);
|
||||||
// TODO: change this to CodecUtil.checkHeader
|
CodecUtil.checkHeader(fieldsStream, CODEC_NAME_DAT, VERSION_START, VERSION_CURRENT);
|
||||||
if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) {
|
assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
|
||||||
throw new CorruptIndexException("unexpected fdx header: " + indexStream);
|
assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
|
||||||
}
|
final long indexSize = indexStream.length() - HEADER_LENGTH_IDX;
|
||||||
|
|
||||||
final long indexSize = indexStream.length() - FORMAT_SIZE;
|
|
||||||
this.size = (int) (indexSize >> 3);
|
this.size = (int) (indexSize >> 3);
|
||||||
// Verify two sources of "maxDoc" agree:
|
// Verify two sources of "maxDoc" agree:
|
||||||
if (this.size != si.docCount) {
|
if (this.size != si.docCount) {
|
||||||
@ -135,7 +134,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void seekIndex(int docID) throws IOException {
|
private void seekIndex(int docID) throws IOException {
|
||||||
indexStream.seek(FORMAT_SIZE + docID * 8L);
|
indexStream.seek(HEADER_LENGTH_IDX + docID * 8L);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||||
@ -148,7 +147,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
|
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
|
||||||
|
|
||||||
int bits = fieldsStream.readByte() & 0xFF;
|
int bits = fieldsStream.readByte() & 0xFF;
|
||||||
assert bits <= (Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK | Lucene40StoredFieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
|
assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
|
||||||
|
|
||||||
switch(visitor.needsField(fieldInfo)) {
|
switch(visitor.needsField(fieldInfo)) {
|
||||||
case YES:
|
case YES:
|
||||||
@ -164,19 +163,19 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
|
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
|
||||||
final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
|
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
|
||||||
if (numeric != 0) {
|
if (numeric != 0) {
|
||||||
switch(numeric) {
|
switch(numeric) {
|
||||||
case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT:
|
case FIELD_IS_NUMERIC_INT:
|
||||||
visitor.intField(info, fieldsStream.readInt());
|
visitor.intField(info, fieldsStream.readInt());
|
||||||
return;
|
return;
|
||||||
case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG:
|
case FIELD_IS_NUMERIC_LONG:
|
||||||
visitor.longField(info, fieldsStream.readLong());
|
visitor.longField(info, fieldsStream.readLong());
|
||||||
return;
|
return;
|
||||||
case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
|
case FIELD_IS_NUMERIC_FLOAT:
|
||||||
visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
|
visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
|
||||||
return;
|
return;
|
||||||
case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
|
case FIELD_IS_NUMERIC_DOUBLE:
|
||||||
visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
|
visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
|
||||||
return;
|
return;
|
||||||
default:
|
default:
|
||||||
@ -186,7 +185,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||||||
final int length = fieldsStream.readVInt();
|
final int length = fieldsStream.readVInt();
|
||||||
byte bytes[] = new byte[length];
|
byte bytes[] = new byte[length];
|
||||||
fieldsStream.readBytes(bytes, 0, length);
|
fieldsStream.readBytes(bytes, 0, length);
|
||||||
if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) {
|
if ((bits & FIELD_IS_BINARY) != 0) {
|
||||||
visitor.binaryField(info, bytes, 0, bytes.length);
|
visitor.binaryField(info, bytes, 0, bytes.length);
|
||||||
} else {
|
} else {
|
||||||
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
|
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
|
||||||
@ -195,15 +194,15 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void skipField(int bits) throws IOException {
|
private void skipField(int bits) throws IOException {
|
||||||
final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
|
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
|
||||||
if (numeric != 0) {
|
if (numeric != 0) {
|
||||||
switch(numeric) {
|
switch(numeric) {
|
||||||
case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT:
|
case FIELD_IS_NUMERIC_INT:
|
||||||
case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
|
case FIELD_IS_NUMERIC_FLOAT:
|
||||||
fieldsStream.readInt();
|
fieldsStream.readInt();
|
||||||
return;
|
return;
|
||||||
case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG:
|
case FIELD_IS_NUMERIC_LONG:
|
||||||
case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
|
case FIELD_IS_NUMERIC_DOUBLE:
|
||||||
fieldsStream.readLong();
|
fieldsStream.readLong();
|
||||||
return;
|
return;
|
||||||
default:
|
default:
|
||||||
@ -242,7 +241,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||||
files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
|
||||||
files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,6 +34,7 @@ import org.apache.lucene.store.IndexInput;
|
|||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -62,16 +63,14 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
|
|||||||
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
|
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
|
||||||
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
|
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
|
||||||
|
|
||||||
// (Happens to be the same as for now) Lucene 3.2: NumericFields are stored in binary format
|
static final String CODEC_NAME_IDX = "Lucene40StoredFieldsIndex";
|
||||||
static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
|
static final String CODEC_NAME_DAT = "Lucene40StoredFieldsData";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
static final long HEADER_LENGTH_IDX = CodecUtil.headerLength(CODEC_NAME_IDX);
|
||||||
|
static final long HEADER_LENGTH_DAT = CodecUtil.headerLength(CODEC_NAME_DAT);
|
||||||
|
|
||||||
// NOTE: if you introduce a new format, make it 1 higher
|
|
||||||
// than the current one, and always change this if you
|
|
||||||
// switch to a new format!
|
|
||||||
static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
|
|
||||||
|
|
||||||
// when removing support for old versions, leave the last supported version here
|
|
||||||
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
|
|
||||||
|
|
||||||
/** Extension of stored fields file */
|
/** Extension of stored fields file */
|
||||||
public static final String FIELDS_EXTENSION = "fdt";
|
public static final String FIELDS_EXTENSION = "fdt";
|
||||||
@ -94,9 +93,10 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
|
|||||||
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
|
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
|
||||||
indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context);
|
indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context);
|
||||||
|
|
||||||
fieldsStream.writeInt(FORMAT_CURRENT);
|
CodecUtil.writeHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT);
|
||||||
indexStream.writeInt(FORMAT_CURRENT);
|
CodecUtil.writeHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT);
|
||||||
|
assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
|
||||||
|
assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
@ -209,7 +209,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(int numDocs) throws IOException {
|
public void finish(int numDocs) throws IOException {
|
||||||
if (4+((long) numDocs)*8 != indexStream.getFilePointer())
|
if (HEADER_LENGTH_IDX+((long) numDocs)*8 != indexStream.getFilePointer())
|
||||||
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
|
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
|
||||||
// we detect that the bug has struck, here, and
|
// we detect that the bug has struck, here, and
|
||||||
// throw an exception to prevent the corruption from
|
// throw an exception to prevent the corruption from
|
||||||
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentInfo;
|
|||||||
import org.apache.lucene.store.DataOutput; // javadocs
|
import org.apache.lucene.store.DataOutput; // javadocs
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.0 Term Vectors format.
|
* Lucene 4.0 Term Vectors format.
|
||||||
@ -38,10 +39,10 @@ import org.apache.lucene.store.IOContext;
|
|||||||
* <p>The Document Index or .tvx file.</p>
|
* <p>The Document Index or .tvx file.</p>
|
||||||
* <p>For each document, this stores the offset into the document data (.tvd) and
|
* <p>For each document, this stores the offset into the document data (.tvd) and
|
||||||
* field data (.tvf) files.</p>
|
* field data (.tvf) files.</p>
|
||||||
* <p>DocumentIndex (.tvx) --> TVXVersion<DocumentPosition,FieldPosition>
|
* <p>DocumentIndex (.tvx) --> Header,<DocumentPosition,FieldPosition>
|
||||||
* <sup>NumDocs</sup></p>
|
* <sup>NumDocs</sup></p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>TVXVersion --> {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
* <li>DocumentPosition --> {@link DataOutput#writeLong UInt64} (offset in the .tvd file)</li>
|
* <li>DocumentPosition --> {@link DataOutput#writeLong UInt64} (offset in the .tvd file)</li>
|
||||||
* <li>FieldPosition --> {@link DataOutput#writeLong UInt64} (offset in the .tvf file)</li>
|
* <li>FieldPosition --> {@link DataOutput#writeLong UInt64} (offset in the .tvf file)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
@ -53,10 +54,10 @@ import org.apache.lucene.store.IOContext;
|
|||||||
* in the .tvf (Term Vector Fields) file.</p>
|
* in the .tvf (Term Vector Fields) file.</p>
|
||||||
* <p>The .tvd file is used to map out the fields that have term vectors stored
|
* <p>The .tvd file is used to map out the fields that have term vectors stored
|
||||||
* and where the field information is in the .tvf file.</p>
|
* and where the field information is in the .tvf file.</p>
|
||||||
* <p>Document (.tvd) --> TVDVersion<NumFields, FieldNums,
|
* <p>Document (.tvd) --> Header,<NumFields, FieldNums,
|
||||||
* FieldPositions> <sup>NumDocs</sup></p>
|
* FieldPositions> <sup>NumDocs</sup></p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>TVDVersion --> {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
* <li>NumFields --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>NumFields --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>FieldNums --> <FieldNumDelta> <sup>NumFields</sup></li>
|
* <li>FieldNums --> <FieldNumDelta> <sup>NumFields</sup></li>
|
||||||
* <li>FieldNumDelta --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>FieldNumDelta --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
@ -69,10 +70,10 @@ import org.apache.lucene.store.IOContext;
|
|||||||
* <p>This file contains, for each field that has a term vector stored, a list of
|
* <p>This file contains, for each field that has a term vector stored, a list of
|
||||||
* the terms, their frequencies and, optionally, position and offset
|
* the terms, their frequencies and, optionally, position and offset
|
||||||
* information.</p>
|
* information.</p>
|
||||||
* <p>Field (.tvf) --> TVFVersion<NumTerms, Position/Offset, TermFreqs>
|
* <p>Field (.tvf) --> Header,<NumTerms, Position/Offset, TermFreqs>
|
||||||
* <sup>NumFields</sup></p>
|
* <sup>NumFields</sup></p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>TVFVersion --> {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
* <li>NumTerms --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>NumTerms --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>Position/Offset --> {@link DataOutput#writeByte Byte}</li>
|
* <li>Position/Offset --> {@link DataOutput#writeByte Byte}</li>
|
||||||
* <li>TermFreqs --> <TermText, TermFreq, Positions?, Offsets?>
|
* <li>TermFreqs --> <TermText, TermFreq, Positions?, Offsets?>
|
||||||
|
@ -33,8 +33,6 @@ import org.apache.lucene.index.FieldInfos;
|
|||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
|
||||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
@ -43,8 +41,10 @@ import org.apache.lucene.store.IOContext;
|
|||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.0 Term Vectors reader.
|
* Lucene 4.0 Term Vectors reader.
|
||||||
* <p>
|
* <p>
|
||||||
@ -54,22 +54,6 @@ import org.apache.lucene.util.IOUtils;
|
|||||||
*/
|
*/
|
||||||
public class Lucene40TermVectorsReader extends TermVectorsReader {
|
public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
// NOTE: if you make a new format, it must be larger than
|
|
||||||
// the current format
|
|
||||||
|
|
||||||
// Changed strings to UTF8 with length-in-bytes not length-in-chars
|
|
||||||
static final int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
|
|
||||||
|
|
||||||
// NOTE: always change this if you switch to a new format!
|
|
||||||
// whenever you add a new format, make it 1 larger (positive version logic)!
|
|
||||||
static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
|
|
||||||
|
|
||||||
// when removing support for old versions, leave the last supported version here
|
|
||||||
static final int FORMAT_MINIMUM = FORMAT_UTF8_LENGTH_IN_BYTES;
|
|
||||||
|
|
||||||
//The size in bytes that the FORMAT_VERSION will take up at the beginning of each file
|
|
||||||
static final int FORMAT_SIZE = 4;
|
|
||||||
|
|
||||||
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
|
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
|
||||||
|
|
||||||
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
|
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
|
||||||
@ -83,6 +67,17 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||||||
/** Extension of vectors index file */
|
/** Extension of vectors index file */
|
||||||
static final String VECTORS_INDEX_EXTENSION = "tvx";
|
static final String VECTORS_INDEX_EXTENSION = "tvx";
|
||||||
|
|
||||||
|
static final String CODEC_NAME_FIELDS = "Lucene40TermVectorsFields";
|
||||||
|
static final String CODEC_NAME_DOCS = "Lucene40TermVectorsDocs";
|
||||||
|
static final String CODEC_NAME_INDEX = "Lucene40TermVectorsIndex";
|
||||||
|
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
static final long HEADER_LENGTH_FIELDS = CodecUtil.headerLength(CODEC_NAME_FIELDS);
|
||||||
|
static final long HEADER_LENGTH_DOCS = CodecUtil.headerLength(CODEC_NAME_DOCS);
|
||||||
|
static final long HEADER_LENGTH_INDEX = CodecUtil.headerLength(CODEC_NAME_INDEX);
|
||||||
|
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
|
|
||||||
private IndexInput tvx;
|
private IndexInput tvx;
|
||||||
@ -91,17 +86,15 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||||||
private int size;
|
private int size;
|
||||||
private int numTotalDocs;
|
private int numTotalDocs;
|
||||||
|
|
||||||
private final int format;
|
|
||||||
|
|
||||||
// used by clone
|
// used by clone
|
||||||
Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs, int format) {
|
Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs) {
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
this.tvx = tvx;
|
this.tvx = tvx;
|
||||||
this.tvd = tvd;
|
this.tvd = tvd;
|
||||||
this.tvf = tvf;
|
this.tvf = tvf;
|
||||||
this.size = size;
|
this.size = size;
|
||||||
this.numTotalDocs = numTotalDocs;
|
this.numTotalDocs = numTotalDocs;
|
||||||
this.format = format;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
|
public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
|
||||||
@ -114,18 +107,21 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||||||
try {
|
try {
|
||||||
String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
|
String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
|
||||||
tvx = d.openInput(idxName, context);
|
tvx = d.openInput(idxName, context);
|
||||||
format = checkValidFormat(tvx);
|
final int tvxVersion = CodecUtil.checkHeader(tvx, CODEC_NAME_INDEX, VERSION_START, VERSION_CURRENT);
|
||||||
|
|
||||||
String fn = IndexFileNames.segmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
|
String fn = IndexFileNames.segmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
|
||||||
tvd = d.openInput(fn, context);
|
tvd = d.openInput(fn, context);
|
||||||
final int tvdFormat = checkValidFormat(tvd);
|
final int tvdVersion = CodecUtil.checkHeader(tvd, CODEC_NAME_DOCS, VERSION_START, VERSION_CURRENT);
|
||||||
fn = IndexFileNames.segmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
|
fn = IndexFileNames.segmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
|
||||||
tvf = d.openInput(fn, context);
|
tvf = d.openInput(fn, context);
|
||||||
final int tvfFormat = checkValidFormat(tvf);
|
final int tvfVersion = CodecUtil.checkHeader(tvf, CODEC_NAME_FIELDS, VERSION_START, VERSION_CURRENT);
|
||||||
|
assert HEADER_LENGTH_INDEX == tvx.getFilePointer();
|
||||||
|
assert HEADER_LENGTH_DOCS == tvd.getFilePointer();
|
||||||
|
assert HEADER_LENGTH_FIELDS == tvf.getFilePointer();
|
||||||
|
assert tvxVersion == tvdVersion;
|
||||||
|
assert tvxVersion == tvfVersion;
|
||||||
|
|
||||||
assert format == tvdFormat;
|
numTotalDocs = (int) (tvx.length()-HEADER_LENGTH_INDEX >> 4);
|
||||||
assert format == tvfFormat;
|
|
||||||
|
|
||||||
numTotalDocs = (int) (tvx.length() >> 4);
|
|
||||||
|
|
||||||
this.size = numTotalDocs;
|
this.size = numTotalDocs;
|
||||||
assert size == 0 || numTotalDocs == size;
|
assert size == 0 || numTotalDocs == size;
|
||||||
@ -156,13 +152,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||||||
|
|
||||||
// Not private to avoid synthetic access$NNN methods
|
// Not private to avoid synthetic access$NNN methods
|
||||||
void seekTvx(final int docNum) throws IOException {
|
void seekTvx(final int docNum) throws IOException {
|
||||||
tvx.seek(docNum * 16L + FORMAT_SIZE);
|
tvx.seek(docNum * 16L + HEADER_LENGTH_INDEX);
|
||||||
}
|
|
||||||
|
|
||||||
boolean canReadRawDocs() {
|
|
||||||
// we can always read raw docs, unless the term vectors
|
|
||||||
// didn't exist
|
|
||||||
return format != 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Retrieve the length (in bytes) of the tvd and tvf
|
/** Retrieve the length (in bytes) of the tvd and tvf
|
||||||
@ -210,16 +200,6 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int checkValidFormat(IndexInput in) throws CorruptIndexException, IOException
|
|
||||||
{
|
|
||||||
int format = in.readInt();
|
|
||||||
if (format < FORMAT_MINIMUM)
|
|
||||||
throw new IndexFormatTooOldException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT);
|
|
||||||
if (format > FORMAT_CURRENT)
|
|
||||||
throw new IndexFormatTooNewException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT);
|
|
||||||
return format;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
IOUtils.close(tvx, tvd, tvf);
|
IOUtils.close(tvx, tvd, tvf);
|
||||||
}
|
}
|
||||||
@ -708,7 +688,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||||||
cloneTvf = (IndexInput) tvf.clone();
|
cloneTvf = (IndexInput) tvf.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, format);
|
return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||||
|
@ -35,9 +35,13 @@ import org.apache.lucene.store.IndexOutput;
|
|||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
|
import static org.apache.lucene.codecs.lucene40.Lucene40TermVectorsReader.*;
|
||||||
|
|
||||||
|
|
||||||
// TODO: make a new 4.0 TV format that encodes better
|
// TODO: make a new 4.0 TV format that encodes better
|
||||||
// - use startOffset (not endOffset) as base for delta on
|
// - use startOffset (not endOffset) as base for delta on
|
||||||
// next startOffset because today for syns or ngrams or
|
// next startOffset because today for syns or ngrams or
|
||||||
@ -59,6 +63,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
|||||||
private final String segment;
|
private final String segment;
|
||||||
private IndexOutput tvx = null, tvd = null, tvf = null;
|
private IndexOutput tvx = null, tvd = null, tvf = null;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public Lucene40TermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
public Lucene40TermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
||||||
this.directory = directory;
|
this.directory = directory;
|
||||||
this.segment = segment;
|
this.segment = segment;
|
||||||
@ -66,11 +72,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
|||||||
try {
|
try {
|
||||||
// Open files for TermVector storage
|
// Open files for TermVector storage
|
||||||
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION), context);
|
tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION), context);
|
||||||
tvx.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
|
CodecUtil.writeHeader(tvx, CODEC_NAME_INDEX, VERSION_CURRENT);
|
||||||
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context);
|
tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context);
|
||||||
tvd.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
|
CodecUtil.writeHeader(tvd, CODEC_NAME_DOCS, VERSION_CURRENT);
|
||||||
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION), context);
|
tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION), context);
|
||||||
tvf.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
|
CodecUtil.writeHeader(tvf, CODEC_NAME_FIELDS, VERSION_CURRENT);
|
||||||
|
assert HEADER_LENGTH_INDEX == tvx.getFilePointer();
|
||||||
|
assert HEADER_LENGTH_DOCS == tvd.getFilePointer();
|
||||||
|
assert HEADER_LENGTH_FIELDS == tvf.getFilePointer();
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
@ -252,12 +261,9 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
|||||||
TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
|
TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
|
||||||
|
|
||||||
if (vectorsReader != null && vectorsReader instanceof Lucene40TermVectorsReader) {
|
if (vectorsReader != null && vectorsReader instanceof Lucene40TermVectorsReader) {
|
||||||
// If the TV* files are an older format then they cannot read raw docs:
|
|
||||||
if (((Lucene40TermVectorsReader)vectorsReader).canReadRawDocs()) {
|
|
||||||
matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader;
|
matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (reader.liveDocs != null) {
|
if (reader.liveDocs != null) {
|
||||||
numDocs += copyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
|
numDocs += copyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
|
||||||
} else {
|
} else {
|
||||||
@ -356,7 +362,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(int numDocs) throws IOException {
|
public void finish(int numDocs) throws IOException {
|
||||||
if (4+((long) numDocs)*16 != tvx.getFilePointer())
|
if (HEADER_LENGTH_INDEX+((long) numDocs)*16 != tvx.getFilePointer())
|
||||||
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
|
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
|
||||||
// we detect that the bug has struck, here, and
|
// we detect that the bug has struck, here, and
|
||||||
// throw an exception to prevent the corruption from
|
// throw an exception to prevent the corruption from
|
||||||
|
@ -236,27 +236,34 @@ public final class Bytes {
|
|||||||
private IndexOutput datOut;
|
private IndexOutput datOut;
|
||||||
protected BytesRef bytesRef = new BytesRef();
|
protected BytesRef bytesRef = new BytesRef();
|
||||||
private final Directory dir;
|
private final Directory dir;
|
||||||
private final String codecName;
|
private final String codecNameIdx;
|
||||||
|
private final String codecNameDat;
|
||||||
private final int version;
|
private final int version;
|
||||||
private final IOContext context;
|
private final IOContext context;
|
||||||
|
|
||||||
protected BytesWriterBase(Directory dir, String id, String codecName,
|
protected BytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
||||||
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
|
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
|
||||||
super(bytesUsed, type);
|
super(bytesUsed, type);
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
this.codecName = codecName;
|
this.codecNameIdx = codecNameIdx;
|
||||||
|
this.codecNameDat = codecNameDat;
|
||||||
this.version = version;
|
this.version = version;
|
||||||
this.context = context;
|
this.context = context;
|
||||||
|
assert codecNameDat != null || codecNameIdx != null: "both codec names are null";
|
||||||
|
assert (codecNameDat != null && !codecNameDat.equals(codecNameIdx))
|
||||||
|
|| (codecNameIdx != null && !codecNameIdx.equals(codecNameDat)):
|
||||||
|
"index and data codec names must not be equal";
|
||||||
}
|
}
|
||||||
|
|
||||||
protected IndexOutput getOrCreateDataOut() throws IOException {
|
protected IndexOutput getOrCreateDataOut() throws IOException {
|
||||||
if (datOut == null) {
|
if (datOut == null) {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
assert codecNameDat != null;
|
||||||
try {
|
try {
|
||||||
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
|
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
|
||||||
DocValuesWriterBase.DATA_EXTENSION), context);
|
DocValuesWriterBase.DATA_EXTENSION), context);
|
||||||
CodecUtil.writeHeader(datOut, codecName, version);
|
CodecUtil.writeHeader(datOut, codecNameDat, version);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
@ -279,9 +286,10 @@ public final class Bytes {
|
|||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
if (idxOut == null) {
|
if (idxOut == null) {
|
||||||
|
assert codecNameIdx != null;
|
||||||
idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
|
idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
|
||||||
DocValuesWriterBase.INDEX_EXTENSION), context);
|
DocValuesWriterBase.INDEX_EXTENSION), context);
|
||||||
CodecUtil.writeHeader(idxOut, codecName, version);
|
CodecUtil.writeHeader(idxOut, codecNameIdx, version);
|
||||||
}
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
@ -309,7 +317,7 @@ public final class Bytes {
|
|||||||
protected final String id;
|
protected final String id;
|
||||||
protected final Type type;
|
protected final Type type;
|
||||||
|
|
||||||
protected BytesReaderBase(Directory dir, String id, String codecName,
|
protected BytesReaderBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
||||||
int maxVersion, boolean doIndex, IOContext context, Type type) throws IOException {
|
int maxVersion, boolean doIndex, IOContext context, Type type) throws IOException {
|
||||||
IndexInput dataIn = null;
|
IndexInput dataIn = null;
|
||||||
IndexInput indexIn = null;
|
IndexInput indexIn = null;
|
||||||
@ -317,11 +325,11 @@ public final class Bytes {
|
|||||||
try {
|
try {
|
||||||
dataIn = dir.openInput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
|
dataIn = dir.openInput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
|
||||||
DocValuesWriterBase.DATA_EXTENSION), context);
|
DocValuesWriterBase.DATA_EXTENSION), context);
|
||||||
version = CodecUtil.checkHeader(dataIn, codecName, maxVersion, maxVersion);
|
version = CodecUtil.checkHeader(dataIn, codecNameDat, maxVersion, maxVersion);
|
||||||
if (doIndex) {
|
if (doIndex) {
|
||||||
indexIn = dir.openInput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
|
indexIn = dir.openInput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
|
||||||
DocValuesWriterBase.INDEX_EXTENSION), context);
|
DocValuesWriterBase.INDEX_EXTENSION), context);
|
||||||
final int version2 = CodecUtil.checkHeader(indexIn, codecName,
|
final int version2 = CodecUtil.checkHeader(indexIn, codecNameIdx,
|
||||||
maxVersion, maxVersion);
|
maxVersion, maxVersion);
|
||||||
assert version == version2;
|
assert version == version2;
|
||||||
}
|
}
|
||||||
@ -377,23 +385,23 @@ public final class Bytes {
|
|||||||
protected final boolean fasterButMoreRam;
|
protected final boolean fasterButMoreRam;
|
||||||
protected long maxBytes = 0;
|
protected long maxBytes = 0;
|
||||||
|
|
||||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
|
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
||||||
int codecVersion, Counter bytesUsed, IOContext context, Type type)
|
int codecVersion, Counter bytesUsed, IOContext context, Type type)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
|
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
|
||||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
|
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
|
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
|
||||||
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
|
int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
|
this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
|
||||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
|
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
|
protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
|
||||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
|
Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
|
||||||
super(dir, id, codecName, codecVersion, bytesUsed, context, type);
|
super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
|
||||||
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
||||||
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||||
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||||
|
@ -39,14 +39,16 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||||||
*/
|
*/
|
||||||
class FixedDerefBytesImpl {
|
class FixedDerefBytesImpl {
|
||||||
|
|
||||||
static final String CODEC_NAME = "FixedDerefBytes";
|
static final String CODEC_NAME_IDX = "FixedDerefBytesIdx";
|
||||||
|
static final String CODEC_NAME_DAT = "FixedDerefBytesDat";
|
||||||
|
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
public static class Writer extends DerefBytesWriterBase {
|
public static class Writer extends DerefBytesWriterBase {
|
||||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -71,7 +73,7 @@ class FixedDerefBytesImpl {
|
|||||||
private final int size;
|
private final int size;
|
||||||
private final int numValuesStored;
|
private final int numValuesStored;
|
||||||
FixedDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
FixedDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_FIXED_DEREF);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_FIXED_DEREF);
|
||||||
size = datIn.readInt();
|
size = datIn.readInt();
|
||||||
numValuesStored = idxIn.readInt();
|
numValuesStored = idxIn.readInt();
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,8 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||||||
*/
|
*/
|
||||||
class FixedSortedBytesImpl {
|
class FixedSortedBytesImpl {
|
||||||
|
|
||||||
static final String CODEC_NAME = "FixedSortedBytes";
|
static final String CODEC_NAME_IDX = "FixedSortedBytesIdx";
|
||||||
|
static final String CODEC_NAME_DAT = "FixedSortedBytesDat";
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
@ -58,7 +59,7 @@ class FixedSortedBytesImpl {
|
|||||||
|
|
||||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
|
||||||
this.comp = comp;
|
this.comp = comp;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,7 +128,7 @@ class FixedSortedBytesImpl {
|
|||||||
|
|
||||||
public Reader(Directory dir, String id, int maxDoc, IOContext context,
|
public Reader(Directory dir, String id, int maxDoc, IOContext context,
|
||||||
Type type, Comparator<BytesRef> comparator) throws IOException {
|
Type type, Comparator<BytesRef> comparator) throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, type);
|
||||||
size = datIn.readInt();
|
size = datIn.readInt();
|
||||||
valueCount = idxIn.readInt();
|
valueCount = idxIn.readInt();
|
||||||
this.comparator = comparator;
|
this.comparator = comparator;
|
||||||
|
@ -61,14 +61,14 @@ class FixedStraightBytesImpl {
|
|||||||
private final int byteBlockSize = BYTE_BLOCK_SIZE;
|
private final int byteBlockSize = BYTE_BLOCK_SIZE;
|
||||||
private final ByteBlockPool pool;
|
private final ByteBlockPool pool;
|
||||||
|
|
||||||
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
|
protected FixedBytesWriterBase(Directory dir, String id, String codecNameDat,
|
||||||
int version, Counter bytesUsed, IOContext context) throws IOException {
|
int version, Counter bytesUsed, IOContext context) throws IOException {
|
||||||
this(dir, id, codecName, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
|
this(dir, id, codecNameDat, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
|
protected FixedBytesWriterBase(Directory dir, String id, String codecNameDat,
|
||||||
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
|
int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
|
||||||
super(dir, id, codecName, version, bytesUsed, context, type);
|
super(dir, id, null, codecNameDat, version, bytesUsed, context, type);
|
||||||
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
||||||
pool.nextBuffer();
|
pool.nextBuffer();
|
||||||
}
|
}
|
||||||
@ -139,8 +139,8 @@ class FixedStraightBytesImpl {
|
|||||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Writer(Directory dir, String id, String codecName, int version, Counter bytesUsed, IOContext context) throws IOException {
|
public Writer(Directory dir, String id, String codecNameDat, int version, Counter bytesUsed, IOContext context) throws IOException {
|
||||||
super(dir, id, codecName, version, bytesUsed, context);
|
super(dir, id, codecNameDat, version, bytesUsed, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -268,8 +268,8 @@ class FixedStraightBytesImpl {
|
|||||||
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, Type.BYTES_FIXED_STRAIGHT);
|
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, Type.BYTES_FIXED_STRAIGHT);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected FixedStraightReader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context, Type type) throws IOException {
|
protected FixedStraightReader(Directory dir, String id, String codecNameDat, int version, int maxDoc, IOContext context, Type type) throws IOException {
|
||||||
super(dir, id, codec, version, false, context, type);
|
super(dir, id, null, codecNameDat, version, false, context, type);
|
||||||
size = datIn.readInt();
|
size = datIn.readInt();
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,9 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||||||
*/
|
*/
|
||||||
class VarDerefBytesImpl {
|
class VarDerefBytesImpl {
|
||||||
|
|
||||||
static final String CODEC_NAME = "VarDerefBytes";
|
static final String CODEC_NAME_IDX = "VarDerefBytesIdx";
|
||||||
|
static final String CODEC_NAME_DAT = "VarDerefBytesDat";
|
||||||
|
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
@ -57,7 +59,7 @@ class VarDerefBytesImpl {
|
|||||||
static class Writer extends DerefBytesWriterBase {
|
static class Writer extends DerefBytesWriterBase {
|
||||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,7 +95,7 @@ class VarDerefBytesImpl {
|
|||||||
public static class VarDerefReader extends BytesReaderBase {
|
public static class VarDerefReader extends BytesReaderBase {
|
||||||
private final long totalBytes;
|
private final long totalBytes;
|
||||||
VarDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
VarDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_VAR_DEREF);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_VAR_DEREF);
|
||||||
totalBytes = idxIn.readLong();
|
totalBytes = idxIn.readLong();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,7 +50,9 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||||||
*/
|
*/
|
||||||
final class VarSortedBytesImpl {
|
final class VarSortedBytesImpl {
|
||||||
|
|
||||||
static final String CODEC_NAME = "VarDerefBytes";
|
static final String CODEC_NAME_IDX = "VarDerefBytesIdx";
|
||||||
|
static final String CODEC_NAME_DAT = "VarDerefBytesDat";
|
||||||
|
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
@ -59,7 +61,7 @@ final class VarSortedBytesImpl {
|
|||||||
|
|
||||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||||
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
|
||||||
this.comp = comp;
|
this.comp = comp;
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
@ -154,7 +156,7 @@ final class VarSortedBytesImpl {
|
|||||||
Reader(Directory dir, String id, int maxDoc,
|
Reader(Directory dir, String id, int maxDoc,
|
||||||
IOContext context, Type type, Comparator<BytesRef> comparator)
|
IOContext context, Type type, Comparator<BytesRef> comparator)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, type);
|
||||||
this.comparator = comparator;
|
this.comparator = comparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,7 +50,9 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||||||
*/
|
*/
|
||||||
class VarStraightBytesImpl {
|
class VarStraightBytesImpl {
|
||||||
|
|
||||||
static final String CODEC_NAME = "VarStraightBytes";
|
static final String CODEC_NAME_IDX = "VarStraightBytesIdx";
|
||||||
|
static final String CODEC_NAME_DAT = "VarStraightBytesDat";
|
||||||
|
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
@ -64,7 +66,7 @@ class VarStraightBytesImpl {
|
|||||||
private boolean merge = false;
|
private boolean merge = false;
|
||||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
|
||||||
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
||||||
docToAddress = new long[1];
|
docToAddress = new long[1];
|
||||||
pool.nextBuffer(); // init
|
pool.nextBuffer(); // init
|
||||||
@ -236,7 +238,7 @@ class VarStraightBytesImpl {
|
|||||||
final int maxDoc;
|
final int maxDoc;
|
||||||
|
|
||||||
VarStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
VarStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_VAR_STRAIGHT);
|
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_VAR_STRAIGHT);
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user