mirror of https://github.com/apache/lucene.git
move all format constants from Lucene50DocValuesConsumer to Lucene50DocValuesFormat
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1687410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eb2ba50412
commit
7f650d6b3e
|
@ -42,56 +42,11 @@ import org.apache.lucene.util.packed.DirectWriter;
|
||||||
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat.*;
|
||||||
|
|
||||||
/** writer for {@link Lucene50DocValuesFormat} */
|
/** writer for {@link Lucene50DocValuesFormat} */
|
||||||
class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
|
|
||||||
static final int BLOCK_SIZE = 16384;
|
|
||||||
|
|
||||||
// address terms in blocks of 16 terms
|
|
||||||
static final int INTERVAL_SHIFT = 4;
|
|
||||||
static final int INTERVAL_COUNT = 1 << INTERVAL_SHIFT;
|
|
||||||
static final int INTERVAL_MASK = INTERVAL_COUNT - 1;
|
|
||||||
|
|
||||||
// build reverse index from every 1024th term
|
|
||||||
static final int REVERSE_INTERVAL_SHIFT = 10;
|
|
||||||
static final int REVERSE_INTERVAL_COUNT = 1 << REVERSE_INTERVAL_SHIFT;
|
|
||||||
static final int REVERSE_INTERVAL_MASK = REVERSE_INTERVAL_COUNT - 1;
|
|
||||||
|
|
||||||
// for conversion from reverse index to block
|
|
||||||
static final int BLOCK_INTERVAL_SHIFT = REVERSE_INTERVAL_SHIFT - INTERVAL_SHIFT;
|
|
||||||
static final int BLOCK_INTERVAL_COUNT = 1 << BLOCK_INTERVAL_SHIFT;
|
|
||||||
static final int BLOCK_INTERVAL_MASK = BLOCK_INTERVAL_COUNT - 1;
|
|
||||||
|
|
||||||
/** Compressed using packed blocks of ints. */
|
|
||||||
public static final int DELTA_COMPRESSED = 0;
|
|
||||||
/** Compressed by computing the GCD. */
|
|
||||||
public static final int GCD_COMPRESSED = 1;
|
|
||||||
/** Compressed by giving IDs to unique values. */
|
|
||||||
public static final int TABLE_COMPRESSED = 2;
|
|
||||||
/** Compressed with monotonically increasing values */
|
|
||||||
public static final int MONOTONIC_COMPRESSED = 3;
|
|
||||||
/** Compressed with constant value (uses only missing bitset) */
|
|
||||||
public static final int CONST_COMPRESSED = 4;
|
|
||||||
|
|
||||||
/** Uncompressed binary, written directly (fixed length). */
|
|
||||||
public static final int BINARY_FIXED_UNCOMPRESSED = 0;
|
|
||||||
/** Uncompressed binary, written directly (variable length). */
|
|
||||||
public static final int BINARY_VARIABLE_UNCOMPRESSED = 1;
|
|
||||||
/** Compressed binary with shared prefixes */
|
|
||||||
public static final int BINARY_PREFIX_COMPRESSED = 2;
|
|
||||||
|
|
||||||
/** Standard storage for sorted set values with 1 level of indirection:
|
|
||||||
* {@code docId -> address -> ord}. */
|
|
||||||
public static final int SORTED_WITH_ADDRESSES = 0;
|
|
||||||
/** Single-valued sorted set values, encoded as sorted values, so no level
|
|
||||||
* of indirection: {@code docId -> ord}. */
|
|
||||||
public static final int SORTED_SINGLE_VALUED = 1;
|
|
||||||
|
|
||||||
/** placeholder for missing offset that means there are no missing values */
|
|
||||||
public static final int ALL_LIVE = -1;
|
|
||||||
/** placeholder for missing offset that means all values are missing */
|
|
||||||
public static final int ALL_MISSING = -2;
|
|
||||||
|
|
||||||
IndexOutput data, meta;
|
IndexOutput data, meta;
|
||||||
final int maxDoc;
|
final int maxDoc;
|
||||||
|
|
||||||
|
@ -329,9 +284,9 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
if (minLength != maxLength) {
|
if (minLength != maxLength) {
|
||||||
meta.writeLong(data.getFilePointer());
|
meta.writeLong(data.getFilePointer());
|
||||||
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
||||||
meta.writeVInt(BLOCK_SIZE);
|
meta.writeVInt(MONOTONIC_BLOCK_SIZE);
|
||||||
|
|
||||||
final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
|
final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, MONOTONIC_BLOCK_SIZE);
|
||||||
long addr = 0;
|
long addr = 0;
|
||||||
writer.add(addr);
|
writer.add(addr);
|
||||||
for (BytesRef v : values) {
|
for (BytesRef v : values) {
|
||||||
|
@ -373,7 +328,7 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
// currently, we have to store the delta from expected for every 1/nth term
|
// currently, we have to store the delta from expected for every 1/nth term
|
||||||
// we could avoid this, but it's not much and less overall RAM than the previous approach!
|
// we could avoid this, but it's not much and less overall RAM than the previous approach!
|
||||||
RAMOutputStream addressBuffer = new RAMOutputStream();
|
RAMOutputStream addressBuffer = new RAMOutputStream();
|
||||||
MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE);
|
MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, MONOTONIC_BLOCK_SIZE);
|
||||||
// buffers up 16 terms
|
// buffers up 16 terms
|
||||||
RAMOutputStream bytesBuffer = new RAMOutputStream();
|
RAMOutputStream bytesBuffer = new RAMOutputStream();
|
||||||
// buffers up block header
|
// buffers up block header
|
||||||
|
@ -424,7 +379,7 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
meta.writeLong(startFP);
|
meta.writeLong(startFP);
|
||||||
meta.writeLong(indexStartFP);
|
meta.writeLong(indexStartFP);
|
||||||
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
||||||
meta.writeVInt(BLOCK_SIZE);
|
meta.writeVInt(MONOTONIC_BLOCK_SIZE);
|
||||||
addReverseTermIndex(field, values, maxLength);
|
addReverseTermIndex(field, values, maxLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -467,7 +422,7 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
BytesRef indexTerm = new BytesRef();
|
BytesRef indexTerm = new BytesRef();
|
||||||
long startFP = data.getFilePointer();
|
long startFP = data.getFilePointer();
|
||||||
PagedBytes pagedBytes = new PagedBytes(15);
|
PagedBytes pagedBytes = new PagedBytes(15);
|
||||||
MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
|
MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, MONOTONIC_BLOCK_SIZE);
|
||||||
|
|
||||||
for (BytesRef b : values) {
|
for (BytesRef b : values) {
|
||||||
int termPosition = (int) (count & REVERSE_INTERVAL_MASK);
|
int termPosition = (int) (count & REVERSE_INTERVAL_MASK);
|
||||||
|
@ -549,9 +504,9 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable {
|
||||||
meta.writeLong(data.getFilePointer());
|
meta.writeLong(data.getFilePointer());
|
||||||
meta.writeVLong(maxDoc);
|
meta.writeVLong(maxDoc);
|
||||||
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
meta.writeVInt(PackedInts.VERSION_CURRENT);
|
||||||
meta.writeVInt(BLOCK_SIZE);
|
meta.writeVInt(MONOTONIC_BLOCK_SIZE);
|
||||||
|
|
||||||
final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
|
final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, MONOTONIC_BLOCK_SIZE);
|
||||||
long addr = 0;
|
long addr = 0;
|
||||||
writer.add(addr);
|
writer.add(addr);
|
||||||
for (Number v : values) {
|
for (Number v : values) {
|
||||||
|
|
|
@ -188,9 +188,59 @@ public final class Lucene50DocValuesFormat extends DocValuesFormat {
|
||||||
static final String META_EXTENSION = "dvm";
|
static final String META_EXTENSION = "dvm";
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
// indicates docvalues type
|
||||||
static final byte NUMERIC = 0;
|
static final byte NUMERIC = 0;
|
||||||
static final byte BINARY = 1;
|
static final byte BINARY = 1;
|
||||||
static final byte SORTED = 2;
|
static final byte SORTED = 2;
|
||||||
static final byte SORTED_SET = 3;
|
static final byte SORTED_SET = 3;
|
||||||
static final byte SORTED_NUMERIC = 4;
|
static final byte SORTED_NUMERIC = 4;
|
||||||
|
|
||||||
|
// address terms in blocks of 16 terms
|
||||||
|
static final int INTERVAL_SHIFT = 4;
|
||||||
|
static final int INTERVAL_COUNT = 1 << INTERVAL_SHIFT;
|
||||||
|
static final int INTERVAL_MASK = INTERVAL_COUNT - 1;
|
||||||
|
|
||||||
|
// build reverse index from every 1024th term
|
||||||
|
static final int REVERSE_INTERVAL_SHIFT = 10;
|
||||||
|
static final int REVERSE_INTERVAL_COUNT = 1 << REVERSE_INTERVAL_SHIFT;
|
||||||
|
static final int REVERSE_INTERVAL_MASK = REVERSE_INTERVAL_COUNT - 1;
|
||||||
|
|
||||||
|
// for conversion from reverse index to block
|
||||||
|
static final int BLOCK_INTERVAL_SHIFT = REVERSE_INTERVAL_SHIFT - INTERVAL_SHIFT;
|
||||||
|
static final int BLOCK_INTERVAL_COUNT = 1 << BLOCK_INTERVAL_SHIFT;
|
||||||
|
static final int BLOCK_INTERVAL_MASK = BLOCK_INTERVAL_COUNT - 1;
|
||||||
|
|
||||||
|
/** Compressed using packed blocks of ints. */
|
||||||
|
static final int DELTA_COMPRESSED = 0;
|
||||||
|
/** Compressed by computing the GCD. */
|
||||||
|
static final int GCD_COMPRESSED = 1;
|
||||||
|
/** Compressed by giving IDs to unique values. */
|
||||||
|
static final int TABLE_COMPRESSED = 2;
|
||||||
|
/** Compressed with monotonically increasing values */
|
||||||
|
static final int MONOTONIC_COMPRESSED = 3;
|
||||||
|
/** Compressed with constant value (uses only missing bitset) */
|
||||||
|
static final int CONST_COMPRESSED = 4;
|
||||||
|
|
||||||
|
/** Uncompressed binary, written directly (fixed length). */
|
||||||
|
static final int BINARY_FIXED_UNCOMPRESSED = 0;
|
||||||
|
/** Uncompressed binary, written directly (variable length). */
|
||||||
|
static final int BINARY_VARIABLE_UNCOMPRESSED = 1;
|
||||||
|
/** Compressed binary with shared prefixes */
|
||||||
|
static final int BINARY_PREFIX_COMPRESSED = 2;
|
||||||
|
|
||||||
|
/** Standard storage for sorted set values with 1 level of indirection:
|
||||||
|
* {@code docId -> address -> ord}. */
|
||||||
|
static final int SORTED_WITH_ADDRESSES = 0;
|
||||||
|
/** Single-valued sorted set values, encoded as sorted values, so no level
|
||||||
|
* of indirection: {@code docId -> ord}. */
|
||||||
|
static final int SORTED_SINGLE_VALUED = 1;
|
||||||
|
|
||||||
|
/** placeholder for missing offset that means there are no missing values */
|
||||||
|
static final int ALL_LIVE = -1;
|
||||||
|
/** placeholder for missing offset that means all values are missing */
|
||||||
|
static final int ALL_MISSING = -2;
|
||||||
|
|
||||||
|
// addressing uses 16k blocks
|
||||||
|
static final int MONOTONIC_BLOCK_SIZE = 16384;
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,25 +57,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.packed.DirectReader;
|
import org.apache.lucene.util.packed.DirectReader;
|
||||||
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
|
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.ALL_LIVE;
|
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat.*;
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.ALL_MISSING;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BLOCK_INTERVAL_MASK;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BLOCK_INTERVAL_SHIFT;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.CONST_COMPRESSED;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.DELTA_COMPRESSED;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.GCD_COMPRESSED;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_COUNT;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_MASK;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_SHIFT;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.MONOTONIC_COMPRESSED;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.REVERSE_INTERVAL_MASK;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.REVERSE_INTERVAL_SHIFT;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.SORTED_SINGLE_VALUED;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.SORTED_WITH_ADDRESSES;
|
|
||||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.TABLE_COMPRESSED;
|
|
||||||
|
|
||||||
/** reader for {@link Lucene50DocValuesFormat} */
|
/** reader for {@link Lucene50DocValuesFormat} */
|
||||||
class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
|
|
Loading…
Reference in New Issue