mirror of https://github.com/apache/lucene.git
javadocs/cleanups
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5178@1515563 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e925901014
commit
c2f37e0de2
|
@ -75,6 +75,7 @@ public abstract class DocValuesProducer implements Closeable {
|
||||||
final SortedDocValues in;
|
final SortedDocValues in;
|
||||||
final int maxDoc;
|
final int maxDoc;
|
||||||
|
|
||||||
|
/** Creates a {@link Bits} returning true if the document has a value */
|
||||||
public SortedDocsWithField(SortedDocValues in, int maxDoc) {
|
public SortedDocsWithField(SortedDocValues in, int maxDoc) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
|
@ -102,6 +103,7 @@ public abstract class DocValuesProducer implements Closeable {
|
||||||
final SortedSetDocValues in;
|
final SortedSetDocValues in;
|
||||||
final int maxDoc;
|
final int maxDoc;
|
||||||
|
|
||||||
|
/** Creates a {@link Bits} returning true if the document has a value */
|
||||||
public SortedSetDocsWithField(SortedSetDocValues in, int maxDoc) {
|
public SortedSetDocsWithField(SortedSetDocValues in, int maxDoc) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.codecs.lucene45;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.Closeable; // javadocs
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -37,7 +38,7 @@ import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/** writer for {@link Lucene45DocValuesFormat} */
|
/** writer for {@link Lucene45DocValuesFormat} */
|
||||||
public class Lucene45DocValuesConsumer extends DocValuesConsumer {
|
public class Lucene45DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
|
|
||||||
static final int BLOCK_SIZE = 16384;
|
static final int BLOCK_SIZE = 16384;
|
||||||
static final int ADDRESS_INTERVAL = 16;
|
static final int ADDRESS_INTERVAL = 16;
|
||||||
|
@ -59,6 +60,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
|
||||||
final IndexOutput data, meta;
|
final IndexOutput data, meta;
|
||||||
final int maxDoc;
|
final int maxDoc;
|
||||||
|
|
||||||
|
/** expert: Creates a new writer */
|
||||||
public Lucene45DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
public Lucene45DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
@ -273,6 +275,7 @@ public class Lucene45DocValuesConsumer extends DocValuesConsumer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** expert: writes a value dictionary for a sorted/sortedset field */
|
||||||
protected void addTermsDict(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
|
protected void addTermsDict(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
|
||||||
// first check if its a "fixed-length" terms dict
|
// first check if its a "fixed-length" terms dict
|
||||||
int minLength = Integer.MAX_VALUE;
|
int minLength = Integer.MAX_VALUE;
|
||||||
|
|
|
@ -60,19 +60,23 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* for each document. The addresses are written in blocks of 16k, with the current absolute
|
* for each document. The addresses are written in blocks of 16k, with the current absolute
|
||||||
* start for the block, and the average (expected) delta per entry. For each document the
|
* start for the block, and the average (expected) delta per entry. For each document the
|
||||||
* deviation from the delta (actual - expected) is written.
|
* deviation from the delta (actual - expected) is written.
|
||||||
* <li>Prefix-compressed Binary: nocommit
|
* <li>Prefix-compressed Binary: values are written in chunks of 16, with the first value written
|
||||||
|
* completely and other values sharing prefixes. chunk addresses are written in blocks of 16k,
|
||||||
|
* with the current absolute start for the block, and the average (expected) delta per entry.
|
||||||
|
* For each chunk the deviation from the delta (actual - expected) is written.
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>
|
* <p>
|
||||||
* {@link DocValuesType#SORTED SORTED}:
|
* {@link DocValuesType#SORTED SORTED}:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Sorted: an FST mapping deduplicated terms to ordinals is written, along with the per-document
|
* <li>Sorted: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary,
|
||||||
* ordinals written using one of the numeric strategies above.
|
* along with the per-document ordinals written using one of the numeric strategies above.
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>
|
* <p>
|
||||||
* {@link DocValuesType#SORTED_SET SORTED_SET}:
|
* {@link DocValuesType#SORTED_SET SORTED_SET}:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>SortedSet: an FST mapping deduplicated terms to ordinals is written, along with the per-document
|
* <li>SortedSet: a mapping of ordinals to deduplicated terms is written as Prefix-Compressed Binary,
|
||||||
* ordinal list written using one of the binary strategies above.
|
* an ordinal list and per-document index into this list are written using the numeric strategies
|
||||||
|
* above.
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>
|
* <p>
|
||||||
* Files:
|
* Files:
|
||||||
|
@ -85,25 +89,35 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* <p>The DocValues metadata or .dvm file.</p>
|
* <p>The DocValues metadata or .dvm file.</p>
|
||||||
* <p>For DocValues field, this stores metadata, such as the offset into the
|
* <p>For DocValues field, this stores metadata, such as the offset into the
|
||||||
* DocValues data (.dvd)</p>
|
* DocValues data (.dvd)</p>
|
||||||
* <p>DocValues metadata (.dvm) --> Header,<FieldNumber,EntryType,Entry><sup>NumFields</sup></p>
|
* <p>DocValues metadata (.dvm) --> Header,<Entry><sup>NumFields</sup></p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Entry --> NumericEntry | BinaryEntry | SortedEntry</li>
|
* <li>Entry --> NumericEntry | BinaryEntry | SortedEntry | SortedSetEntry</li>
|
||||||
* <li>NumericEntry --> DataOffset,NumericCompressionType,PackedVersion</li>
|
* <li>NumericEntry --> GCDNumericEntry | TableNumericEntry | DeltaNumericEntry</li>
|
||||||
* <li>BinaryEntry --> DataOffset,DataLength,MinLength,MaxLength,PackedVersion?,BlockSize?</li>
|
* <li>GCDNumericEntry --> NumericHeader,MinValue,GCD</li>
|
||||||
* <li>SortedEntry --> DataOffset,ValueCount</li>
|
* <li>TableNumericEntry --> NumericHeader,TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup></li>
|
||||||
|
* <li>DeltaNumericEntry --> NumericHeader</li>
|
||||||
|
* <li>NumericHeader --> FieldNumber,EntryType,NumericType,MissingOffset,PackedVersion,DataOffset,Count,BlockSize</li>
|
||||||
|
* <li>BinaryEntry --> FixedBinaryEntry | VariableBinaryEntry | PrefixBinaryEntry</li>
|
||||||
|
* <li>FixedBinaryEntry --> BinaryHeader</li>
|
||||||
|
* <li>VariableBinaryEntry --> BinaryHeader,AddressOffset,PackedVersion,BlockSize</li>
|
||||||
|
* <li>PrefixBinaryEntry --> BinaryHeader,AddressInterval,AddressOffset,PackedVersion,BlockSize</li>
|
||||||
|
* <li>BinaryHeader --> FieldNumber,EntryType,BinaryType,MissingOffset,MinLength,MaxLength,DataOffset</li>
|
||||||
|
* <li>SortedEntry --> FieldNumber,EntryType,BinaryEntry,NumericEntry</li>
|
||||||
|
* <li>SortedSetEntry --> EntryType,BinaryEntry,NumericEntry,NumericEntry</li>
|
||||||
* <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --> {@link DataOutput#writeVInt VInt}</li>
|
* <li>FieldNumber,PackedVersion,MinLength,MaxLength,BlockSize,ValueCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>DataOffset,DataLength --> {@link DataOutput#writeLong Int64}</li>
|
|
||||||
* <li>EntryType,CompressionType --> {@link DataOutput#writeByte Byte}</li>
|
* <li>EntryType,CompressionType --> {@link DataOutput#writeByte Byte}</li>
|
||||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
|
* <li>MinValue,GCD,MissingOffset,AddressOffset,DataOffset --> {@link DataOutput#writeLong Int64}</li>
|
||||||
|
* <li>TableSize --> {@link DataOutput#writeVInt vInt}</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>Sorted fields have two entries: a SortedEntry with the FST metadata,
|
* <p>Sorted fields have two entries: a BinaryEntry with the value metadata,
|
||||||
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
|
* and an ordinary NumericEntry for the document-to-ord metadata.</p>
|
||||||
* <p>SortedSet fields have two entries: a SortedEntry with the FST metadata,
|
* <p>SortedSet fields have three entries: a BinaryEntry with the value metadata,
|
||||||
* and an ordinary BinaryEntry for the document-to-ord-list metadata.</p>
|
* and two NumericEntries for the document-to-ord-index and ordinal list metadata.</p>
|
||||||
* <p>FieldNumber of -1 indicates the end of metadata.</p>
|
* <p>FieldNumber of -1 indicates the end of metadata.</p>
|
||||||
* <p>EntryType is a 0 (NumericEntry), 1 (BinaryEntry, or 2 (SortedEntry)</p>
|
* <p>EntryType is a 0 (NumericEntry) or 1 (BinaryEntry)</p>
|
||||||
* <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
|
* <p>DataOffset is the pointer to the start of the data in the DocValues data (.dvd)</p>
|
||||||
* <p>NumericCompressionType indicates how Numeric values will be compressed:
|
* <p>NumericType indicates how Numeric values will be compressed:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>0 --> delta-compressed. For each block of 16k integers, every integer is delta-encoded
|
* <li>0 --> delta-compressed. For each block of 16k integers, every integer is delta-encoded
|
||||||
* from the minimum value within the block.
|
* from the minimum value within the block.
|
||||||
|
@ -112,10 +126,18 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* <li>2 --> table-compressed. When the number of unique numeric values is small and it would save space,
|
* <li>2 --> table-compressed. When the number of unique numeric values is small and it would save space,
|
||||||
* a lookup table of unique values is written, followed by the ordinal for each document.
|
* a lookup table of unique values is written, followed by the ordinal for each document.
|
||||||
* </ul>
|
* </ul>
|
||||||
|
* <p>BinaryType indicates how Binary values will be stored:
|
||||||
|
* <ul>
|
||||||
|
* <li>0 --> fixed-width. All values have the same length, addressing by multiplication.
|
||||||
|
* <li>1 -->, variable-width. An address for each value is stored.
|
||||||
|
* <li>2 --> prefix-compressed. An address to the start of every interval'th value is stored.
|
||||||
|
* </ul>
|
||||||
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
|
* <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
|
||||||
* If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
|
* If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
|
||||||
* Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
|
* Otherwise, the binary values are of variable size, and packed integer metadata (PackedVersion,BlockSize)
|
||||||
* is written for the addresses.
|
* is written for the addresses.
|
||||||
|
* <p>MissingOffset points to a byte[] containing a bitset of all documents that had a value for the field.
|
||||||
|
* If its -1, then there are no missing values.
|
||||||
* <li><a name="dvd" id="dvd"></a>
|
* <li><a name="dvd" id="dvd"></a>
|
||||||
* <p>The DocValues data or .dvd file.</p>
|
* <p>The DocValues data or .dvd file.</p>
|
||||||
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
|
* <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
|
||||||
|
@ -125,21 +147,18 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* <li>BinaryData --> {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
|
* <li>BinaryData --> {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
|
||||||
* <li>SortedData --> {@link FST FST<Int64>}</li>
|
* <li>SortedData --> {@link FST FST<Int64>}</li>
|
||||||
* <li>DeltaCompressedNumerics --> {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
|
* <li>DeltaCompressedNumerics --> {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
|
||||||
* <li>TableCompressedNumerics --> TableSize,{@link DataOutput#writeLong Int64}<sup>TableSize</sup>,{@link PackedInts PackedInts}</li>
|
* <li>TableCompressedNumerics --> {@link PackedInts PackedInts}</li>
|
||||||
* <li>GCDCompressedNumerics --> MinValue,GCD,{@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
|
* <li>GCDCompressedNumerics --> {@link BlockPackedWriter BlockPackedInts(blockSize=16k)}</li>
|
||||||
* <li>Addresses --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
|
* <li>Addresses --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedInts(blockSize=16k)}</li>
|
||||||
* <li>TableSize --> {@link DataOutput#writeVInt vInt}</li>
|
|
||||||
* <li>MinValue --> {@link DataOutput#writeLong Int64}</li>
|
|
||||||
* <li>GCD --> {@link DataOutput#writeLong Int64}</li>
|
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
|
* <p>SortedSet entries store the list of ordinals in their BinaryData as a
|
||||||
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
|
* sequences of increasing {@link DataOutput#writeVLong vLong}s, delta-encoded.</p>
|
||||||
* </ol>
|
* </ol>
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
// nocommit: docs are incomplete
|
|
||||||
public final class Lucene45DocValuesFormat extends DocValuesFormat {
|
public final class Lucene45DocValuesFormat extends DocValuesFormat {
|
||||||
|
|
||||||
|
/** Sole Constructor */
|
||||||
public Lucene45DocValuesFormat() {
|
public Lucene45DocValuesFormat() {
|
||||||
super("Lucene45");
|
super("Lucene45");
|
||||||
}
|
}
|
||||||
|
@ -154,14 +173,14 @@ public final class Lucene45DocValuesFormat extends DocValuesFormat {
|
||||||
return new Lucene45DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
|
return new Lucene45DocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String DATA_CODEC = "Lucene45DocValuesData";
|
static final String DATA_CODEC = "Lucene45DocValuesData";
|
||||||
public static final String DATA_EXTENSION = "dvd";
|
static final String DATA_EXTENSION = "dvd";
|
||||||
public static final String META_CODEC = "Lucene45ValuesMetadata";
|
static final String META_CODEC = "Lucene45ValuesMetadata";
|
||||||
public static final String META_EXTENSION = "dvm";
|
static final String META_EXTENSION = "dvm";
|
||||||
public static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
public static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
public static final byte NUMERIC = 0;
|
static final byte NUMERIC = 0;
|
||||||
public static final byte BINARY = 1;
|
static final byte BINARY = 1;
|
||||||
public static final byte SORTED = 2;
|
static final byte SORTED = 2;
|
||||||
public static final byte SORTED_SET = 3;
|
static final byte SORTED_SET = 3;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@ import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY
|
||||||
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
|
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
|
||||||
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
|
import static org.apache.lucene.codecs.lucene45.Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
|
||||||
|
|
||||||
|
import java.io.Closeable; // javadocs
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -53,7 +54,8 @@ import org.apache.lucene.util.packed.BlockPackedReader;
|
||||||
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
|
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
public class Lucene45DocValuesProducer extends DocValuesProducer {
|
/** reader for {@link Lucene45DocValuesFormat} */
|
||||||
|
public class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
private final Map<Integer,NumericEntry> numerics;
|
private final Map<Integer,NumericEntry> numerics;
|
||||||
private final Map<Integer,BinaryEntry> binaries;
|
private final Map<Integer,BinaryEntry> binaries;
|
||||||
private final Map<Integer,NumericEntry> ords;
|
private final Map<Integer,NumericEntry> ords;
|
||||||
|
@ -65,6 +67,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
|
||||||
private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
|
private final Map<Integer,MonotonicBlockPackedReader> addressInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
|
||||||
private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
|
private final Map<Integer,MonotonicBlockPackedReader> ordIndexInstances = new HashMap<Integer,MonotonicBlockPackedReader>();
|
||||||
|
|
||||||
|
/** expert: instantiates a new reader */
|
||||||
protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
protected Lucene45DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
|
||||||
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
|
||||||
// read in the entries from the metadata file.
|
// read in the entries from the metadata file.
|
||||||
|
@ -317,6 +320,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** returns an address instance for variable-length binary values.
|
||||||
|
* @lucene.internal */
|
||||||
protected MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
|
protected MonotonicBlockPackedReader getAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
|
||||||
final MonotonicBlockPackedReader addresses;
|
final MonotonicBlockPackedReader addresses;
|
||||||
synchronized (addressInstances) {
|
synchronized (addressInstances) {
|
||||||
|
@ -358,6 +363,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** returns an address instance for prefix-compressed binary values.
|
||||||
|
* @lucene.internal */
|
||||||
protected MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
|
protected MonotonicBlockPackedReader getIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) throws IOException {
|
||||||
final MonotonicBlockPackedReader addresses;
|
final MonotonicBlockPackedReader addresses;
|
||||||
final long interval = bytes.addressInterval;
|
final long interval = bytes.addressInterval;
|
||||||
|
@ -434,6 +441,8 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** returns an address instance for sortedset ordinal lists
|
||||||
|
* @lucene.internal */
|
||||||
protected MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
|
protected MonotonicBlockPackedReader getOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) throws IOException {
|
||||||
final MonotonicBlockPackedReader ordIndex;
|
final MonotonicBlockPackedReader ordIndex;
|
||||||
synchronized (ordIndexInstances) {
|
synchronized (ordIndexInstances) {
|
||||||
|
@ -509,7 +518,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
public Bits getMissingBits(final long offset) throws IOException {
|
private Bits getMissingBits(final long offset) throws IOException {
|
||||||
if (offset == -1) {
|
if (offset == -1) {
|
||||||
return new Bits.MatchAllBits(maxDoc);
|
return new Bits.MatchAllBits(maxDoc);
|
||||||
} else {
|
} else {
|
||||||
|
@ -557,13 +566,20 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
|
||||||
data.close();
|
data.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** metadata entry for a numeric docvalues field */
|
||||||
protected static class NumericEntry {
|
protected static class NumericEntry {
|
||||||
|
private NumericEntry() {}
|
||||||
|
/** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
|
||||||
long missingOffset;
|
long missingOffset;
|
||||||
|
/** offset to the actual numeric values */
|
||||||
public long offset;
|
public long offset;
|
||||||
|
|
||||||
public int format;
|
int format;
|
||||||
|
/** packed ints version used to encode these numerics */
|
||||||
public int packedIntsVersion;
|
public int packedIntsVersion;
|
||||||
|
/** count of values written */
|
||||||
public long count;
|
public long count;
|
||||||
|
/** packed ints blocksize */
|
||||||
public int blockSize;
|
public int blockSize;
|
||||||
|
|
||||||
long minValue;
|
long minValue;
|
||||||
|
@ -571,17 +587,26 @@ public class Lucene45DocValuesProducer extends DocValuesProducer {
|
||||||
long table[];
|
long table[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** metadata entry for a binary docvalues field */
|
||||||
protected static class BinaryEntry {
|
protected static class BinaryEntry {
|
||||||
|
private BinaryEntry() {}
|
||||||
|
/** offset to the bitset representing docsWithField, or -1 if no documents have missing values */
|
||||||
long missingOffset;
|
long missingOffset;
|
||||||
|
/** offset to the actual binary values */
|
||||||
long offset;
|
long offset;
|
||||||
|
|
||||||
int format;
|
int format;
|
||||||
|
/** count of values written */
|
||||||
public long count;
|
public long count;
|
||||||
int minLength;
|
int minLength;
|
||||||
int maxLength;
|
int maxLength;
|
||||||
|
/** offset to the addressing data that maps a value to its slice of the byte[] */
|
||||||
public long addressesOffset;
|
public long addressesOffset;
|
||||||
|
/** interval of shared prefix chunks (when using prefix-compressed binary) */
|
||||||
public long addressInterval;
|
public long addressInterval;
|
||||||
|
/** packed ints version used to encode addressing information */
|
||||||
public int packedIntsVersion;
|
public int packedIntsVersion;
|
||||||
|
/** packed ints blocksize */
|
||||||
public int blockSize;
|
public int blockSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,8 +96,8 @@ class BinaryDocValuesWriter extends DocValuesWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private long docsWithFieldBytesUsed() {
|
private long docsWithFieldBytesUsed() {
|
||||||
// nocommit: this is not correct
|
// size of the long[] + some overhead
|
||||||
return docsWithField.getBits().length*RamUsageEstimator.NUM_BYTES_LONG;
|
return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateBytesUsed() {
|
private void updateBytesUsed() {
|
||||||
|
|
|
@ -70,8 +70,8 @@ class NumericDocValuesWriter extends DocValuesWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private long docsWithFieldBytesUsed() {
|
private long docsWithFieldBytesUsed() {
|
||||||
// nocommit: this is not correct
|
// size of the long[] + some overhead
|
||||||
return docsWithField.getBits().length*RamUsageEstimator.NUM_BYTES_LONG;
|
return RamUsageEstimator.sizeOf(docsWithField.getBits()) + 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateBytesUsed() {
|
private void updateBytesUsed() {
|
||||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.codecs.lucene42.Lucene42NormsFormat;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
||||||
// nocommit
|
|
||||||
/**
|
/**
|
||||||
* Just like {@link Lucene42NormsFormat} but with additional asserts.
|
* Just like {@link Lucene42NormsFormat} but with additional asserts.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Codecs for testing (simulate old disk formats, wacky theoretical use cases, etc)
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -2538,7 +2538,7 @@ public abstract class BaseDocValuesFormatTestCase extends LuceneTestCase {
|
||||||
d.close();
|
d.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// nocommit: get this out of here and into the deprecated codecs (4.0, 4.2)
|
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
|
||||||
public void testHugeBinaryValueLimit() throws Exception {
|
public void testHugeBinaryValueLimit() throws Exception {
|
||||||
// We only test DVFormats that have a limit
|
// We only test DVFormats that have a limit
|
||||||
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
|
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
|
||||||
|
|
|
@ -757,14 +757,13 @@ public class _TestUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// nocommit: remove this, push this test to Lucene40/Lucene42 codec tests
|
// TODO: remove this, push this test to Lucene40/Lucene42 codec tests
|
||||||
public static boolean fieldSupportsHugeBinaryDocValues(String field) {
|
public static boolean fieldSupportsHugeBinaryDocValues(String field) {
|
||||||
String dvFormat = getDocValuesFormat(field);
|
String dvFormat = getDocValuesFormat(field);
|
||||||
System.out.println(dvFormat);
|
if (dvFormat.equals("Lucene40") || dvFormat.equals("Lucene42")) {
|
||||||
return dvFormat.equals("Lucene45") ||
|
return false;
|
||||||
dvFormat.equals("Asserting") ||
|
}
|
||||||
dvFormat.equals("Disk") ||
|
return true;
|
||||||
dvFormat.equals("SimpleText");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
|
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
|
||||||
|
|
Loading…
Reference in New Issue