mirror of
https://github.com/apache/lucene.git
synced 2025-02-08 19:15:06 +00:00
additional 4.0 checks, javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438595 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f7db724186
commit
86e30c7f7f
@ -19,17 +19,113 @@ package org.apache.lucene.codecs.lucene40;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.store.CompoundFileDirectory;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lucene 4.0 DocValues format.
|
||||||
|
* <p>
|
||||||
|
* Files:
|
||||||
|
* <ul>
|
||||||
|
* <li><tt>.dv.cfs</tt>: {@link CompoundFileDirectory compound container}</li>
|
||||||
|
* <li><tt>.dv.cfe</tt>: {@link CompoundFileDirectory compound entries}</li>
|
||||||
|
* </ul>
|
||||||
|
* Entries within the compound file:
|
||||||
|
* <ul>
|
||||||
|
* <li><tt><segment>_<fieldNumber>.dat</tt>: data values</li>
|
||||||
|
* <li><tt><segment>_<fieldNumber>.idx</tt>: index into the .dat for DEREF types</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* There are several many types of {@code DocValues} with different encodings.
|
||||||
|
* From the perspective of filenames, all types store their values in <tt>.dat</tt>
|
||||||
|
* entries within the compound file. In the case of dereferenced/sorted types, the <tt>.dat</tt>
|
||||||
|
* actually contains only the unique values, and an additional <tt>.idx</tt> file contains
|
||||||
|
* pointers to these unique values.
|
||||||
|
* </p>
|
||||||
|
* Formats:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@code VAR_INTS} .dat --> Header, PackedType, MinValue,
|
||||||
|
* DefaultValue, PackedStream</li>
|
||||||
|
* <li>{@code FIXED_INTS_8} .dat --> Header, ValueSize,
|
||||||
|
* {@link DataOutput#writeByte Byte}<sup>maxdoc</sup></li>
|
||||||
|
* <li>{@code FIXED_INTS_16} .dat --> Header, ValueSize,
|
||||||
|
* {@link DataOutput#writeShort Short}<sup>maxdoc</sup></li>
|
||||||
|
* <li>{@code FIXED_INTS_32} .dat --> Header, ValueSize,
|
||||||
|
* {@link DataOutput#writeInt Int32}<sup>maxdoc</sup></li>
|
||||||
|
* <li>{@code FIXED_INTS_64} .dat --> Header, ValueSize,
|
||||||
|
* {@link DataOutput#writeLong Int64}<sup>maxdoc</sup></li>
|
||||||
|
* <li>{@code FLOAT_32} .dat --> Header, ValueSize, Float32<sup>maxdoc</sup></li>
|
||||||
|
* <li>{@code FLOAT_64} .dat --> Header, ValueSize, Float64<sup>maxdoc</sup></li>
|
||||||
|
* <li>{@code BYTES_FIXED_STRAIGHT} .dat --> Header, ValueSize,
|
||||||
|
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>maxdoc</sup></li>
|
||||||
|
* <li>{@code BYTES_VAR_STRAIGHT} .idx --> Header, TotalBytes, Addresses</li>
|
||||||
|
* <li>{@code BYTES_VAR_STRAIGHT} .dat --> Header,
|
||||||
|
({@link DataOutput#writeByte Byte} * <i>variable ValueSize</i>)<sup>maxdoc</sup></li>
|
||||||
|
* <li>{@code BYTES_FIXED_DEREF} .idx --> Header, NumValues, Addresses</li>
|
||||||
|
* <li>{@code BYTES_FIXED_DEREF} .dat --> Header, ValueSize,
|
||||||
|
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>NumValues</sup></li>
|
||||||
|
* <li>{@code BYTES_VAR_DEREF} .idx --> Header, TotalVarBytes, Addresses</li>
|
||||||
|
* <li>{@code BYTES_VAR_DEREF} .dat --> Header,
|
||||||
|
* (LengthPrefix + {@link DataOutput#writeByte Byte} * <i>variable ValueSize</i>)<sup>NumValues</sup></li>
|
||||||
|
* <li>{@code BYTES_FIXED_SORTED} .idx --> Header, NumValues, Ordinals</li>
|
||||||
|
* <li>{@code BYTES_FIXED_SORTED} .dat --> Header, ValueSize,
|
||||||
|
* ({@link DataOutput#writeByte Byte} * ValueSize)<sup>NumValues</sup></li>
|
||||||
|
* <li>{@code BYTES_VAR_SORTED} .idx --> Header, TotalVarBytes, Addresses, Ordinals</li>
|
||||||
|
* <li>{@code BYTES_VAR_SORTED} .dat --> Header,
|
||||||
|
* ({@link DataOutput#writeByte Byte} * <i>variable ValueSize</i>)<sup>NumValues</sup></li>
|
||||||
|
* </ul>
|
||||||
|
* Data Types:
|
||||||
|
* <ul>
|
||||||
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
|
* <li>PackedType --> {@link DataOutput#writeByte Byte}</li>
|
||||||
|
* <li>MaxAddress, MinValue, DefaultValue --> {@link DataOutput#writeLong Int64}</li>
|
||||||
|
* <li>PackedStream, Addresses, Ordinals --> {@link PackedInts}</li>
|
||||||
|
* <li>ValueSize, NumValues --> {@link DataOutput#writeInt Int32}</li>
|
||||||
|
* <li>Float32 --> 32-bit float encoded with {@link Float#floatToRawIntBits(float)}
|
||||||
|
* then written as {@link DataOutput#writeInt Int32}</li>
|
||||||
|
* <li>Float64 --> 64-bit float encoded with {@link Double#doubleToRawLongBits(double)}
|
||||||
|
* then written as {@link DataOutput#writeLong Int64}</li>
|
||||||
|
* <li>TotalBytes --> {@link DataOutput#writeVLong VLong}</li>
|
||||||
|
* <li>TotalVarBytes --> {@link DataOutput#writeLong Int64}</li>
|
||||||
|
* <li>LengthPrefix --> Length of the data value as {@link DataOutput#writeVInt VInt} (maximum
|
||||||
|
* of 2 bytes)</li>
|
||||||
|
* </ul>
|
||||||
|
* Notes:
|
||||||
|
* <ul>
|
||||||
|
* <li>PackedType is a 0 when compressed, 1 when the stream is written as 64-bit integers.</li>
|
||||||
|
* <li>Addresses stores pointers to the actual byte location (indexed by docid). In the VAR_STRAIGHT
|
||||||
|
* case, each entry can have a different length, so to determine the length, docid+1 is
|
||||||
|
* retrieved. A sentinel address is written at the end for the VAR_STRAIGHT case, so the Addresses
|
||||||
|
* stream contains maxdoc+1 indices. For the deduplicated VAR_DEREF case, each length
|
||||||
|
* is encoded as a prefix to the data itself as a {@link DataOutput#writeVInt VInt}
|
||||||
|
* (maximum of 2 bytes).</li>
|
||||||
|
* <li>Ordinals stores the term ID in sorted order (indexed by docid). In the FIXED_SORTED case,
|
||||||
|
* the address into the .dat can be computed from the ordinal as
|
||||||
|
* <code>Header+ValueSize+(ordinal*ValueSize)</code> because the byte length is fixed.
|
||||||
|
* In the VAR_SORTED case, there is double indirection (docid -> ordinal -> address), but
|
||||||
|
* an additional sentinel ordinal+address is always written (so there are NumValues+1 ordinals). To
|
||||||
|
* determine the length, ord+1's address is looked up as well.</li>
|
||||||
|
* <li>{@code BYTES_VAR_STRAIGHT BYTES_VAR_STRAIGHT} in contrast to other straight
|
||||||
|
* variants uses a <tt>.idx</tt> file to improve lookup perfromance. In contrast to
|
||||||
|
* {@code BYTES_VAR_DEREF BYTES_VAR_DEREF} it doesn't apply deduplication of the document values.
|
||||||
|
* </li>
|
||||||
|
* </ul>
|
||||||
|
* @deprecated Only for reading old 4.0 and 4.1 segments
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
// NOTE: not registered in SPI, doesnt respect segment suffix, etc
|
// NOTE: not registered in SPI, doesnt respect segment suffix, etc
|
||||||
// for back compat only!
|
// for back compat only!
|
||||||
public class Lucene40DocValuesFormat extends DocValuesFormat {
|
public class Lucene40DocValuesFormat extends DocValuesFormat {
|
||||||
|
|
||||||
|
/** Sole constructor. */
|
||||||
public Lucene40DocValuesFormat() {
|
public Lucene40DocValuesFormat() {
|
||||||
super("Lucene40");
|
super("Lucene40");
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,12 @@ import org.apache.lucene.util.BytesRef;
|
|||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads the 4.0 format of norms/docvalues
|
||||||
|
* @lucene.experimental
|
||||||
|
* @deprecated Only for reading old 4.0 and 4.1 segments
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
class Lucene40DocValuesReader extends DocValuesProducer {
|
class Lucene40DocValuesReader extends DocValuesProducer {
|
||||||
private final Directory dir;
|
private final Directory dir;
|
||||||
private final SegmentReadState state;
|
private final SegmentReadState state;
|
||||||
@ -56,24 +62,6 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
this.state = state;
|
this.state = state;
|
||||||
this.legacyKey = legacyKey;
|
this.legacyKey = legacyKey;
|
||||||
this.dir = new CompoundFileDirectory(state.directory, filename, state.context, false);
|
this.dir = new CompoundFileDirectory(state.directory, filename, state.context, false);
|
||||||
// nocommit: uncomment to debug
|
|
||||||
/*
|
|
||||||
if (legacyKey.equals(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY)) {
|
|
||||||
System.out.println("dv READER:");
|
|
||||||
for (FieldInfo fi : state.fieldInfos) {
|
|
||||||
if (fi.hasDocValues()) {
|
|
||||||
System.out.println(fi.name + " -> " + fi.getAttribute(legacyKey) + " -> " + fi.getDocValuesType());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
System.out.println("nrm READER:");
|
|
||||||
for (FieldInfo fi : state.fieldInfos) {
|
|
||||||
if (fi.hasNorms()) {
|
|
||||||
System.out.println(fi.name + " -> " + fi.getAttribute(legacyKey) + " -> " + fi.getNormType());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -109,6 +97,9 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
default:
|
default:
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
|
if (input.getFilePointer() != input.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||||
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
@ -163,7 +154,10 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME,
|
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME,
|
||||||
Lucene40DocValuesFormat.INTS_VERSION_START,
|
Lucene40DocValuesFormat.INTS_VERSION_START,
|
||||||
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
|
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
|
||||||
input.readInt();
|
int valueSize = input.readInt();
|
||||||
|
if (valueSize != 1) {
|
||||||
|
throw new CorruptIndexException("invalid valueSize: " + valueSize);
|
||||||
|
}
|
||||||
int maxDoc = state.segmentInfo.getDocCount();
|
int maxDoc = state.segmentInfo.getDocCount();
|
||||||
final byte values[] = new byte[maxDoc];
|
final byte values[] = new byte[maxDoc];
|
||||||
input.readBytes(values, 0, values.length);
|
input.readBytes(values, 0, values.length);
|
||||||
@ -179,7 +173,10 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME,
|
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME,
|
||||||
Lucene40DocValuesFormat.INTS_VERSION_START,
|
Lucene40DocValuesFormat.INTS_VERSION_START,
|
||||||
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
|
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
|
||||||
input.readInt();
|
int valueSize = input.readInt();
|
||||||
|
if (valueSize != 2) {
|
||||||
|
throw new CorruptIndexException("invalid valueSize: " + valueSize);
|
||||||
|
}
|
||||||
int maxDoc = state.segmentInfo.getDocCount();
|
int maxDoc = state.segmentInfo.getDocCount();
|
||||||
final short values[] = new short[maxDoc];
|
final short values[] = new short[maxDoc];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
@ -197,7 +194,10 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME,
|
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME,
|
||||||
Lucene40DocValuesFormat.INTS_VERSION_START,
|
Lucene40DocValuesFormat.INTS_VERSION_START,
|
||||||
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
|
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
|
||||||
input.readInt();
|
int valueSize = input.readInt();
|
||||||
|
if (valueSize != 4) {
|
||||||
|
throw new CorruptIndexException("invalid valueSize: " + valueSize);
|
||||||
|
}
|
||||||
int maxDoc = state.segmentInfo.getDocCount();
|
int maxDoc = state.segmentInfo.getDocCount();
|
||||||
final int values[] = new int[maxDoc];
|
final int values[] = new int[maxDoc];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
@ -215,7 +215,10 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME,
|
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.INTS_CODEC_NAME,
|
||||||
Lucene40DocValuesFormat.INTS_VERSION_START,
|
Lucene40DocValuesFormat.INTS_VERSION_START,
|
||||||
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
|
Lucene40DocValuesFormat.INTS_VERSION_CURRENT);
|
||||||
input.readInt();
|
int valueSize = input.readInt();
|
||||||
|
if (valueSize != 8) {
|
||||||
|
throw new CorruptIndexException("invalid valueSize: " + valueSize);
|
||||||
|
}
|
||||||
int maxDoc = state.segmentInfo.getDocCount();
|
int maxDoc = state.segmentInfo.getDocCount();
|
||||||
final long values[] = new long[maxDoc];
|
final long values[] = new long[maxDoc];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
@ -233,7 +236,10 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME,
|
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME,
|
||||||
Lucene40DocValuesFormat.FLOATS_VERSION_START,
|
Lucene40DocValuesFormat.FLOATS_VERSION_START,
|
||||||
Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT);
|
Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT);
|
||||||
input.readInt();
|
int valueSize = input.readInt();
|
||||||
|
if (valueSize != 4) {
|
||||||
|
throw new CorruptIndexException("invalid valueSize: " + valueSize);
|
||||||
|
}
|
||||||
int maxDoc = state.segmentInfo.getDocCount();
|
int maxDoc = state.segmentInfo.getDocCount();
|
||||||
final int values[] = new int[maxDoc];
|
final int values[] = new int[maxDoc];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
@ -251,7 +257,10 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME,
|
CodecUtil.checkHeader(input, Lucene40DocValuesFormat.FLOATS_CODEC_NAME,
|
||||||
Lucene40DocValuesFormat.FLOATS_VERSION_START,
|
Lucene40DocValuesFormat.FLOATS_VERSION_START,
|
||||||
Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT);
|
Lucene40DocValuesFormat.FLOATS_VERSION_CURRENT);
|
||||||
input.readInt();
|
int valueSize = input.readInt();
|
||||||
|
if (valueSize != 8) {
|
||||||
|
throw new CorruptIndexException("invalid valueSize: " + valueSize);
|
||||||
|
}
|
||||||
int maxDoc = state.segmentInfo.getDocCount();
|
int maxDoc = state.segmentInfo.getDocCount();
|
||||||
final long values[] = new long[maxDoc];
|
final long values[] = new long[maxDoc];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
@ -302,6 +311,9 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
// nocommit? can the current impl even handle > 2G?
|
// nocommit? can the current impl even handle > 2G?
|
||||||
final byte bytes[] = new byte[state.segmentInfo.getDocCount() * fixedLength];
|
final byte bytes[] = new byte[state.segmentInfo.getDocCount() * fixedLength];
|
||||||
input.readBytes(bytes, 0, bytes.length);
|
input.readBytes(bytes, 0, bytes.length);
|
||||||
|
if (input.getFilePointer() != input.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||||
|
}
|
||||||
success = true;
|
success = true;
|
||||||
return new BinaryDocValues() {
|
return new BinaryDocValues() {
|
||||||
@Override
|
@Override
|
||||||
@ -340,6 +352,12 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
final byte bytes[] = new byte[(int)totalBytes];
|
final byte bytes[] = new byte[(int)totalBytes];
|
||||||
data.readBytes(bytes, 0, bytes.length);
|
data.readBytes(bytes, 0, bytes.length);
|
||||||
final PackedInts.Reader reader = PackedInts.getReader(index);
|
final PackedInts.Reader reader = PackedInts.getReader(index);
|
||||||
|
if (data.getFilePointer() != data.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
|
||||||
|
}
|
||||||
|
if (index.getFilePointer() != index.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
|
||||||
|
}
|
||||||
success = true;
|
success = true;
|
||||||
return new BinaryDocValues() {
|
return new BinaryDocValues() {
|
||||||
@Override
|
@Override
|
||||||
@ -382,6 +400,12 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
final byte bytes[] = new byte[fixedLength * valueCount];
|
final byte bytes[] = new byte[fixedLength * valueCount];
|
||||||
data.readBytes(bytes, 0, bytes.length);
|
data.readBytes(bytes, 0, bytes.length);
|
||||||
final PackedInts.Reader reader = PackedInts.getReader(index);
|
final PackedInts.Reader reader = PackedInts.getReader(index);
|
||||||
|
if (data.getFilePointer() != data.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
|
||||||
|
}
|
||||||
|
if (index.getFilePointer() != index.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
|
||||||
|
}
|
||||||
success = true;
|
success = true;
|
||||||
return new BinaryDocValues() {
|
return new BinaryDocValues() {
|
||||||
@Override
|
@Override
|
||||||
@ -422,6 +446,12 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
final byte bytes[] = new byte[(int)totalBytes];
|
final byte bytes[] = new byte[(int)totalBytes];
|
||||||
data.readBytes(bytes, 0, bytes.length);
|
data.readBytes(bytes, 0, bytes.length);
|
||||||
final PackedInts.Reader reader = PackedInts.getReader(index);
|
final PackedInts.Reader reader = PackedInts.getReader(index);
|
||||||
|
if (data.getFilePointer() != data.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
|
||||||
|
}
|
||||||
|
if (index.getFilePointer() != index.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
|
||||||
|
}
|
||||||
success = true;
|
success = true;
|
||||||
return new BinaryDocValues() {
|
return new BinaryDocValues() {
|
||||||
@Override
|
@Override
|
||||||
@ -470,6 +500,12 @@ class Lucene40DocValuesReader extends DocValuesProducer {
|
|||||||
default:
|
default:
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
|
if (data.getFilePointer() != data.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + dataName + "\": read " + data.getFilePointer() + " vs size " + data.length() + " (resource: " + data + ")");
|
||||||
|
}
|
||||||
|
if (index.getFilePointer() != index.length()) {
|
||||||
|
throw new CorruptIndexException("did not read all bytes from file \"" + indexName + "\": read " + index.getFilePointer() + " vs size " + index.length() + " (resource: " + index + ")");
|
||||||
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
|
@ -25,9 +25,29 @@ import org.apache.lucene.codecs.NormsFormat;
|
|||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.store.CompoundFileDirectory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lucene 4.0 Norms Format.
|
||||||
|
* <p>
|
||||||
|
* Files:
|
||||||
|
* <ul>
|
||||||
|
* <li><tt>.nrm.cfs</tt>: {@link CompoundFileDirectory compound container}</li>
|
||||||
|
* <li><tt>.nrm.cfe</tt>: {@link CompoundFileDirectory compound entries}</li>
|
||||||
|
* </ul>
|
||||||
|
* Norms are implemented as DocValues, so other than file extension, norms are
|
||||||
|
* written exactly the same way as {@link Lucene40DocValuesFormat DocValues}.
|
||||||
|
*
|
||||||
|
* @see Lucene40DocValuesFormat
|
||||||
|
* @lucene.experimental
|
||||||
|
* @deprecated Only for reading old 4.0 and 4.1 segments
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
public class Lucene40NormsFormat extends NormsFormat {
|
public class Lucene40NormsFormat extends NormsFormat {
|
||||||
|
|
||||||
|
/** Sole constructor. */
|
||||||
|
public Lucene40NormsFormat() {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
|
@ -81,9 +81,26 @@ public final class FieldInfo {
|
|||||||
DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
|
DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DocValues types.
|
||||||
|
* Note that DocValues is strongly typed, so a field cannot have different types
|
||||||
|
* across different documents.
|
||||||
|
*/
|
||||||
public static enum DocValuesType {
|
public static enum DocValuesType {
|
||||||
|
/**
|
||||||
|
* A per-document Number
|
||||||
|
*/
|
||||||
NUMERIC,
|
NUMERIC,
|
||||||
|
/**
|
||||||
|
* A per-document byte[].
|
||||||
|
*/
|
||||||
BINARY,
|
BINARY,
|
||||||
|
/**
|
||||||
|
* A pre-sorted byte[]. Fields with this type only store distinct byte values
|
||||||
|
* and store an additional offset pointer per document to dereference the shared
|
||||||
|
* byte[]. The stored byte[] is presorted and allows access via document id,
|
||||||
|
* ordinal and by-value.
|
||||||
|
*/
|
||||||
SORTED
|
SORTED
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -31,9 +31,24 @@ import org.apache.lucene.util.BytesRef;
|
|||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A wrapper for CompositeIndexReader providing access to DocValues.
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: for multi readers, you'll get better
|
||||||
|
* performance by gathering the sub readers using
|
||||||
|
* {@link IndexReader#getContext()} to get the
|
||||||
|
* atomic leaves and then operate per-AtomicReader,
|
||||||
|
* instead of using this class.
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: This is very costly.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
// nocommit move this back to test-framework!!!
|
// nocommit move this back to test-framework!!!
|
||||||
public class MultiDocValues {
|
public class MultiDocValues {
|
||||||
|
|
||||||
|
/** returns a NumericDocValues for a reader's norms (potentially merging on-the-fly) */
|
||||||
// moved to src/java so SlowWrapper can use it... uggggggh
|
// moved to src/java so SlowWrapper can use it... uggggggh
|
||||||
public static NumericDocValues getNormValues(final IndexReader r, final String field) throws IOException {
|
public static NumericDocValues getNormValues(final IndexReader r, final String field) throws IOException {
|
||||||
final List<AtomicReaderContext> leaves = r.leaves();
|
final List<AtomicReaderContext> leaves = r.leaves();
|
||||||
@ -74,6 +89,7 @@ public class MultiDocValues {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** returns a NumericDocValues for a reader's docvalues (potentially merging on-the-fly) */
|
||||||
public static NumericDocValues getNumericValues(final IndexReader r, final String field) throws IOException {
|
public static NumericDocValues getNumericValues(final IndexReader r, final String field) throws IOException {
|
||||||
final List<AtomicReaderContext> leaves = r.leaves();
|
final List<AtomicReaderContext> leaves = r.leaves();
|
||||||
if (leaves.size() == 1) {
|
if (leaves.size() == 1) {
|
||||||
@ -111,6 +127,7 @@ public class MultiDocValues {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** returns a BinaryDocValues for a reader's docvalues (potentially merging on-the-fly) */
|
||||||
public static BinaryDocValues getBinaryValues(final IndexReader r, final String field) throws IOException {
|
public static BinaryDocValues getBinaryValues(final IndexReader r, final String field) throws IOException {
|
||||||
final List<AtomicReaderContext> leaves = r.leaves();
|
final List<AtomicReaderContext> leaves = r.leaves();
|
||||||
if (leaves.size() == 1) {
|
if (leaves.size() == 1) {
|
||||||
@ -152,6 +169,7 @@ public class MultiDocValues {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things) */
|
||||||
public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
|
public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
|
||||||
final List<AtomicReaderContext> leaves = r.leaves();
|
final List<AtomicReaderContext> leaves = r.leaves();
|
||||||
if (leaves.size() == 1) {
|
if (leaves.size() == 1) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user