diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesConsumer.java index 100f8f32f81..ab23cdd5e62 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesConsumer.java @@ -42,56 +42,11 @@ import org.apache.lucene.util.packed.DirectWriter; import org.apache.lucene.util.packed.MonotonicBlockPackedWriter; import org.apache.lucene.util.packed.PackedInts; +import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat.*; + /** writer for {@link Lucene50DocValuesFormat} */ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable { - static final int BLOCK_SIZE = 16384; - - // address terms in blocks of 16 terms - static final int INTERVAL_SHIFT = 4; - static final int INTERVAL_COUNT = 1 << INTERVAL_SHIFT; - static final int INTERVAL_MASK = INTERVAL_COUNT - 1; - - // build reverse index from every 1024th term - static final int REVERSE_INTERVAL_SHIFT = 10; - static final int REVERSE_INTERVAL_COUNT = 1 << REVERSE_INTERVAL_SHIFT; - static final int REVERSE_INTERVAL_MASK = REVERSE_INTERVAL_COUNT - 1; - - // for conversion from reverse index to block - static final int BLOCK_INTERVAL_SHIFT = REVERSE_INTERVAL_SHIFT - INTERVAL_SHIFT; - static final int BLOCK_INTERVAL_COUNT = 1 << BLOCK_INTERVAL_SHIFT; - static final int BLOCK_INTERVAL_MASK = BLOCK_INTERVAL_COUNT - 1; - - /** Compressed using packed blocks of ints. */ - public static final int DELTA_COMPRESSED = 0; - /** Compressed by computing the GCD. */ - public static final int GCD_COMPRESSED = 1; - /** Compressed by giving IDs to unique values. */ - public static final int TABLE_COMPRESSED = 2; - /** Compressed with monotonically increasing values */ - public static final int MONOTONIC_COMPRESSED = 3; - /** Compressed with constant value (uses only missing bitset) */ - public static final int CONST_COMPRESSED = 4; - - /** Uncompressed binary, written directly (fixed length). */ - public static final int BINARY_FIXED_UNCOMPRESSED = 0; - /** Uncompressed binary, written directly (variable length). */ - public static final int BINARY_VARIABLE_UNCOMPRESSED = 1; - /** Compressed binary with shared prefixes */ - public static final int BINARY_PREFIX_COMPRESSED = 2; - - /** Standard storage for sorted set values with 1 level of indirection: - * {@code docId -> address -> ord}. */ - public static final int SORTED_WITH_ADDRESSES = 0; - /** Single-valued sorted set values, encoded as sorted values, so no level - * of indirection: {@code docId -> ord}. */ - public static final int SORTED_SINGLE_VALUED = 1; - - /** placeholder for missing offset that means there are no missing values */ - public static final int ALL_LIVE = -1; - /** placeholder for missing offset that means all values are missing */ - public static final int ALL_MISSING = -2; - IndexOutput data, meta; final int maxDoc; @@ -329,9 +284,9 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable { if (minLength != maxLength) { meta.writeLong(data.getFilePointer()); meta.writeVInt(PackedInts.VERSION_CURRENT); - meta.writeVInt(BLOCK_SIZE); + meta.writeVInt(MONOTONIC_BLOCK_SIZE); - final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); + final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, MONOTONIC_BLOCK_SIZE); long addr = 0; writer.add(addr); for (BytesRef v : values) { @@ -373,7 +328,7 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable { // currently, we have to store the delta from expected for every 1/nth term // we could avoid this, but it's not much and less overall RAM than the previous approach! RAMOutputStream addressBuffer = new RAMOutputStream(); - MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, BLOCK_SIZE); + MonotonicBlockPackedWriter termAddresses = new MonotonicBlockPackedWriter(addressBuffer, MONOTONIC_BLOCK_SIZE); // buffers up 16 terms RAMOutputStream bytesBuffer = new RAMOutputStream(); // buffers up block header @@ -424,7 +379,7 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable { meta.writeLong(startFP); meta.writeLong(indexStartFP); meta.writeVInt(PackedInts.VERSION_CURRENT); - meta.writeVInt(BLOCK_SIZE); + meta.writeVInt(MONOTONIC_BLOCK_SIZE); addReverseTermIndex(field, values, maxLength); } } @@ -467,7 +422,7 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable { BytesRef indexTerm = new BytesRef(); long startFP = data.getFilePointer(); PagedBytes pagedBytes = new PagedBytes(15); - MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); + MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, MONOTONIC_BLOCK_SIZE); for (BytesRef b : values) { int termPosition = (int) (count & REVERSE_INTERVAL_MASK); @@ -549,9 +504,9 @@ class Lucene50DocValuesConsumer extends DocValuesConsumer implements Closeable { meta.writeLong(data.getFilePointer()); meta.writeVLong(maxDoc); meta.writeVInt(PackedInts.VERSION_CURRENT); - meta.writeVInt(BLOCK_SIZE); + meta.writeVInt(MONOTONIC_BLOCK_SIZE); - final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE); + final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, MONOTONIC_BLOCK_SIZE); long addr = 0; writer.add(addr); for (Number v : values) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesFormat.java index 14fcdd92967..18d7c69b6ac 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesFormat.java @@ -188,9 +188,59 @@ public final class Lucene50DocValuesFormat extends DocValuesFormat { static final String META_EXTENSION = "dvm"; static final int VERSION_START = 0; static final int VERSION_CURRENT = VERSION_START; + + // indicates docvalues type static final byte NUMERIC = 0; static final byte BINARY = 1; static final byte SORTED = 2; static final byte SORTED_SET = 3; static final byte SORTED_NUMERIC = 4; + + // address terms in blocks of 16 terms + static final int INTERVAL_SHIFT = 4; + static final int INTERVAL_COUNT = 1 << INTERVAL_SHIFT; + static final int INTERVAL_MASK = INTERVAL_COUNT - 1; + + // build reverse index from every 1024th term + static final int REVERSE_INTERVAL_SHIFT = 10; + static final int REVERSE_INTERVAL_COUNT = 1 << REVERSE_INTERVAL_SHIFT; + static final int REVERSE_INTERVAL_MASK = REVERSE_INTERVAL_COUNT - 1; + + // for conversion from reverse index to block + static final int BLOCK_INTERVAL_SHIFT = REVERSE_INTERVAL_SHIFT - INTERVAL_SHIFT; + static final int BLOCK_INTERVAL_COUNT = 1 << BLOCK_INTERVAL_SHIFT; + static final int BLOCK_INTERVAL_MASK = BLOCK_INTERVAL_COUNT - 1; + + /** Compressed using packed blocks of ints. */ + static final int DELTA_COMPRESSED = 0; + /** Compressed by computing the GCD. */ + static final int GCD_COMPRESSED = 1; + /** Compressed by giving IDs to unique values. */ + static final int TABLE_COMPRESSED = 2; + /** Compressed with monotonically increasing values */ + static final int MONOTONIC_COMPRESSED = 3; + /** Compressed with constant value (uses only missing bitset) */ + static final int CONST_COMPRESSED = 4; + + /** Uncompressed binary, written directly (fixed length). */ + static final int BINARY_FIXED_UNCOMPRESSED = 0; + /** Uncompressed binary, written directly (variable length). */ + static final int BINARY_VARIABLE_UNCOMPRESSED = 1; + /** Compressed binary with shared prefixes */ + static final int BINARY_PREFIX_COMPRESSED = 2; + + /** Standard storage for sorted set values with 1 level of indirection: + * {@code docId -> address -> ord}. */ + static final int SORTED_WITH_ADDRESSES = 0; + /** Single-valued sorted set values, encoded as sorted values, so no level + * of indirection: {@code docId -> ord}. */ + static final int SORTED_SINGLE_VALUED = 1; + + /** placeholder for missing offset that means there are no missing values */ + static final int ALL_LIVE = -1; + /** placeholder for missing offset that means all values are missing */ + static final int ALL_MISSING = -2; + + // addressing uses 16k blocks + static final int MONOTONIC_BLOCK_SIZE = 16384; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesProducer.java index 753099d445e..9eb4e79a323 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50DocValuesProducer.java @@ -57,25 +57,7 @@ import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.DirectReader; import org.apache.lucene.util.packed.MonotonicBlockPackedReader; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.ALL_LIVE; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.ALL_MISSING; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_PREFIX_COMPRESSED; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BLOCK_INTERVAL_MASK; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BLOCK_INTERVAL_SHIFT; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.CONST_COMPRESSED; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.DELTA_COMPRESSED; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.GCD_COMPRESSED; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_COUNT; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_MASK; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_SHIFT; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.MONOTONIC_COMPRESSED; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.REVERSE_INTERVAL_MASK; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.REVERSE_INTERVAL_SHIFT; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.SORTED_SINGLE_VALUED; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.SORTED_WITH_ADDRESSES; -import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.TABLE_COMPRESSED; +import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesFormat.*; /** reader for {@link Lucene50DocValuesFormat} */ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {