mirror of https://github.com/apache/lucene.git
LUCENE-5301: Add a common interface to PackedInts data-structures, which extends NumericDocValues.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1541063 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
12b06a0b72
commit
ea15eae958
|
@ -233,12 +233,7 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
|||
case DELTA_COMPRESSED:
|
||||
final int blockSize = data.readVInt();
|
||||
final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false);
|
||||
return new NumericDocValues() {
|
||||
@Override
|
||||
public long get(int docID) {
|
||||
return reader.get(docID);
|
||||
}
|
||||
};
|
||||
return reader;
|
||||
case UNCOMPRESSED:
|
||||
final byte bytes[] = new byte[maxDoc];
|
||||
data.readBytes(bytes, 0, bytes.length);
|
||||
|
|
|
@ -52,6 +52,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.packed.BlockPackedReader;
|
||||
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
@ -295,24 +296,19 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
return sizeInBytes;
|
||||
}
|
||||
|
||||
LongNumericDocValues getNumeric(NumericEntry entry) throws IOException {
|
||||
LongValues getNumeric(NumericEntry entry) throws IOException {
|
||||
final IndexInput data = this.data.clone();
|
||||
data.seek(entry.offset);
|
||||
|
||||
switch (entry.format) {
|
||||
case DELTA_COMPRESSED:
|
||||
final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
|
||||
return new LongNumericDocValues() {
|
||||
@Override
|
||||
public long get(long id) {
|
||||
return reader.get(id);
|
||||
}
|
||||
};
|
||||
return reader;
|
||||
case GCD_COMPRESSED:
|
||||
final long min = entry.minValue;
|
||||
final long mult = entry.gcd;
|
||||
final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
|
||||
return new LongNumericDocValues() {
|
||||
return new LongValues() {
|
||||
@Override
|
||||
public long get(long id) {
|
||||
return min + mult * quotientReader.get(id);
|
||||
|
@ -322,7 +318,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
final long table[] = entry.table;
|
||||
final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
|
||||
final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
|
||||
return new LongNumericDocValues() {
|
||||
return new LongValues() {
|
||||
@Override
|
||||
public long get(long id) {
|
||||
return table[(int) ords.get((int) id)];
|
||||
|
@ -522,7 +518,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
final long valueCount = binaries.get(field.number).count;
|
||||
// we keep the byte[]s and list of ords on disk, these could be large
|
||||
final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
|
||||
final LongNumericDocValues ordinals = getNumeric(ords.get(field.number));
|
||||
final LongValues ordinals = getNumeric(ords.get(field.number));
|
||||
// but the addresses to the ord stream are in RAM
|
||||
final MonotonicBlockPackedReader ordIndex = getOrdIndexInstance(data, field, ordIndexes.get(field.number));
|
||||
|
||||
|
@ -676,15 +672,6 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
|
|||
}
|
||||
|
||||
// internally we compose complex dv (sorted/sortedset) from other ones
|
||||
static abstract class LongNumericDocValues extends NumericDocValues {
|
||||
@Override
|
||||
public final long get(int docID) {
|
||||
return get((long) docID);
|
||||
}
|
||||
|
||||
abstract long get(long id);
|
||||
}
|
||||
|
||||
static abstract class LongBinaryDocValues extends BinaryDocValues {
|
||||
@Override
|
||||
public final void get(int docID, BytesRef result) {
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/** Abstraction over an array of longs.
|
||||
* This class extends NumericDocValues so that we don't need to add another
|
||||
* level of abstraction every time we want eg. to use the {@link PackedInts}
|
||||
* utility classes to represent a {@link NumericDocValues} instance.
|
||||
* @lucene.internal */
|
||||
public abstract class LongValues extends NumericDocValues {
|
||||
|
||||
/** Get value at <code>index</code>. */
|
||||
public abstract long get(long index);
|
||||
|
||||
@Override
|
||||
public long get(int idx) {
|
||||
return get((long) idx);
|
||||
}
|
||||
|
||||
}
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.util.packed;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
@ -25,7 +26,7 @@ import java.util.Arrays;
|
|||
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
|
||||
|
||||
/** Common functionality shared by {@link AppendingDeltaPackedLongBuffer} and {@link MonotonicAppendingLongBuffer}. */
|
||||
abstract class AbstractAppendingLongBuffer {
|
||||
abstract class AbstractAppendingLongBuffer extends LongValues {
|
||||
|
||||
static final int MIN_PAGE_SIZE = 64;
|
||||
// More than 1M doesn't really makes sense with these appending buffers
|
||||
|
@ -92,7 +93,7 @@ abstract class AbstractAppendingLongBuffer {
|
|||
|
||||
abstract void packPendingValues();
|
||||
|
||||
/** Get a value from this buffer. */
|
||||
@Override
|
||||
public final long get(long index) {
|
||||
assert index >= 0 && index < size();
|
||||
final int block = (int) (index >> pageShift);
|
||||
|
|
|
@ -21,13 +21,14 @@ import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
|
|||
import static org.apache.lucene.util.packed.PackedInts.numBlocks;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Base implementation for {@link PagedMutable} and {@link PagedGrowableWriter}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> {
|
||||
abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends LongValues {
|
||||
|
||||
static final int MIN_BLOCK_SIZE = 1 << 6;
|
||||
static final int MAX_BLOCK_SIZE = 1 << 30;
|
||||
|
@ -80,7 +81,7 @@ abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> {
|
|||
return (int) index & pageMask;
|
||||
}
|
||||
|
||||
/** Get value at <code>index</code>. */
|
||||
@Override
|
||||
public final long get(long index) {
|
||||
assert index >= 0 && index < size;
|
||||
final int pageIndex = pageIndex(index);
|
||||
|
|
|
@ -29,12 +29,13 @@ import static org.apache.lucene.util.packed.PackedInts.numBlocks;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
/**
|
||||
* Provides random access to a stream written with {@link BlockPackedWriter}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class BlockPackedReader {
|
||||
public final class BlockPackedReader extends LongValues {
|
||||
|
||||
private final int blockShift, blockMask;
|
||||
private final long valueCount;
|
||||
|
@ -77,7 +78,7 @@ public final class BlockPackedReader {
|
|||
this.minValues = minValues;
|
||||
}
|
||||
|
||||
/** Get value at <code>index</code>. */
|
||||
@Override
|
||||
public long get(long index) {
|
||||
assert index >= 0 && index < valueCount;
|
||||
final int block = (int) (index >>> blockShift);
|
||||
|
|
|
@ -21,8 +21,6 @@ import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.BPV_SHIFT;
|
|||
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
|
||||
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
|
||||
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0;
|
||||
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.readVLong;
|
||||
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
|
||||
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
|
||||
|
||||
import java.io.EOFException;
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
*
|
||||
* <p>@lucene.internal</p>
|
||||
*/
|
||||
public class GrowableWriter implements PackedInts.Mutable {
|
||||
public class GrowableWriter extends PackedInts.Mutable {
|
||||
|
||||
private long currentMask;
|
||||
private PackedInts.Mutable current;
|
||||
|
|
|
@ -26,6 +26,7 @@ import static org.apache.lucene.util.packed.PackedInts.numBlocks;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
|
@ -33,7 +34,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
* {@link MonotonicBlockPackedWriter}.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class MonotonicBlockPackedReader {
|
||||
public final class MonotonicBlockPackedReader extends LongValues {
|
||||
|
||||
private final int blockShift, blockMask;
|
||||
private final long valueCount;
|
||||
|
@ -72,7 +73,7 @@ public final class MonotonicBlockPackedReader {
|
|||
}
|
||||
}
|
||||
|
||||
/** Get value at <code>index</code>. */
|
||||
@Override
|
||||
public long get(long index) {
|
||||
assert index >= 0 && index < valueCount;
|
||||
final int block = (int) (index >>> blockShift);
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -451,19 +452,24 @@ public class PackedInts {
|
|||
* A read-only random access array of positive integers.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static interface Reader {
|
||||
/**
|
||||
* @param index the position of the wanted value.
|
||||
* @return the value at the stated index.
|
||||
*/
|
||||
long get(int index);
|
||||
public static abstract class Reader extends NumericDocValues {
|
||||
|
||||
/**
|
||||
* Bulk get: read at least one and at most <code>len</code> longs starting
|
||||
* from <code>index</code> into <code>arr[off:off+len]</code> and return
|
||||
* the actual number of values that have been read.
|
||||
*/
|
||||
int get(int index, long[] arr, int off, int len);
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < size();
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(size() - index, len);
|
||||
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||
arr[o] = get(i);
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of bits used to store any given value.
|
||||
|
@ -471,17 +477,17 @@ public class PackedInts {
|
|||
* {@code bitsPerValue * #values} as implementations are free to
|
||||
* use non-space-optimal packing of bits.
|
||||
*/
|
||||
int getBitsPerValue();
|
||||
public abstract int getBitsPerValue();
|
||||
|
||||
/**
|
||||
* @return the number of values.
|
||||
*/
|
||||
int size();
|
||||
public abstract int size();
|
||||
|
||||
/**
|
||||
* Return the in-memory size in bytes.
|
||||
*/
|
||||
long ramBytesUsed();
|
||||
public abstract long ramBytesUsed();
|
||||
|
||||
/**
|
||||
* Expert: if the bit-width of this reader matches one of
|
||||
|
@ -492,7 +498,10 @@ public class PackedInts {
|
|||
* interpret the full value as unsigned. Ie,
|
||||
* bytes[idx]&0xFF, shorts[idx]&0xFFFF, etc.
|
||||
*/
|
||||
Object getArray();
|
||||
public Object getArray() {
|
||||
assert !hasArray();
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this implementation is backed by a
|
||||
|
@ -500,7 +509,9 @@ public class PackedInts {
|
|||
*
|
||||
* @see #getArray
|
||||
*/
|
||||
boolean hasArray();
|
||||
public boolean hasArray() {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -558,14 +569,14 @@ public class PackedInts {
|
|||
* A packed integer array that can be modified.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static interface Mutable extends Reader {
|
||||
public static abstract class Mutable extends Reader {
|
||||
|
||||
/**
|
||||
* Set the value at the given index in the array.
|
||||
* @param index where the value should be positioned.
|
||||
* @param value a value conforming to the constraints set by the array.
|
||||
*/
|
||||
void set(int index, long value);
|
||||
public abstract void set(int index, long value);
|
||||
|
||||
/**
|
||||
* Bulk set: set at least one and at most <code>len</code> longs starting
|
||||
|
@ -573,25 +584,55 @@ public class PackedInts {
|
|||
* <code>index</code>. Returns the actual number of values that have been
|
||||
* set.
|
||||
*/
|
||||
int set(int index, long[] arr, int off, int len);
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < size();
|
||||
len = Math.min(len, size() - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
for (int i = index, o = off, end = index + len; i < end; ++i, ++o) {
|
||||
set(i, arr[o]);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the mutable from <code>fromIndex</code> (inclusive) to
|
||||
* <code>toIndex</code> (exclusive) with <code>val</code>.
|
||||
*/
|
||||
void fill(int fromIndex, int toIndex, long val);
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert val <= maxValue(getBitsPerValue());
|
||||
assert fromIndex <= toIndex;
|
||||
for (int i = fromIndex; i < toIndex; ++i) {
|
||||
set(i, val);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets all values to 0.
|
||||
*/
|
||||
void clear();
|
||||
public void clear() {
|
||||
fill(0, size(), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Save this mutable into <code>out</code>. Instantiating a reader from
|
||||
* the generated data will return a reader with the same number of bits
|
||||
* per value.
|
||||
*/
|
||||
void save(DataOutput out) throws IOException;
|
||||
public void save(DataOutput out) throws IOException {
|
||||
Writer writer = getWriterNoHeader(out, getFormat(), size(), getBitsPerValue(), DEFAULT_BUFFER_SIZE);
|
||||
writer.writeHeader();
|
||||
for (int i = 0; i < size(); ++i) {
|
||||
writer.add(get(i));
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
/** The underlying format. */
|
||||
Format getFormat() {
|
||||
return Format.PACKED;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -599,7 +640,7 @@ public class PackedInts {
|
|||
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
|
||||
* @lucene.internal
|
||||
*/
|
||||
static abstract class ReaderImpl implements Reader {
|
||||
static abstract class ReaderImpl extends Reader {
|
||||
protected final int bitsPerValue;
|
||||
protected final int valueCount;
|
||||
|
||||
|
@ -610,86 +651,45 @@ public class PackedInts {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int getBitsPerValue() {
|
||||
public abstract long get(int index);
|
||||
|
||||
@Override
|
||||
public final int getBitsPerValue() {
|
||||
return bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
public final int size() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int gets = Math.min(valueCount - index, len);
|
||||
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
|
||||
arr[o] = get(i);
|
||||
}
|
||||
return gets;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static abstract class MutableImpl extends ReaderImpl implements Mutable {
|
||||
static abstract class MutableImpl extends Mutable {
|
||||
|
||||
protected final int valueCount;
|
||||
protected final int bitsPerValue;
|
||||
|
||||
protected MutableImpl(int valueCount, int bitsPerValue) {
|
||||
super(valueCount, bitsPerValue);
|
||||
this.valueCount = valueCount;
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
for (int i = index, o = off, end = index + len; i < end; ++i, ++o) {
|
||||
set(i, arr[o]);
|
||||
}
|
||||
return len;
|
||||
public final int getBitsPerValue() {
|
||||
return bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert val <= maxValue(bitsPerValue);
|
||||
assert fromIndex <= toIndex;
|
||||
for (int i = fromIndex; i < toIndex; ++i) {
|
||||
set(i, val);
|
||||
}
|
||||
public final int size() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
protected Format getFormat() {
|
||||
return Format.PACKED;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(DataOutput out) throws IOException {
|
||||
Writer writer = getWriterNoHeader(out, getFormat(),
|
||||
valueCount, bitsPerValue, DEFAULT_BUFFER_SIZE);
|
||||
writer.writeHeader();
|
||||
for (int i = 0; i < valueCount; ++i) {
|
||||
writer.add(get(i));
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
}
|
||||
|
||||
/** A {@link Reader} which has all its values equal to 0 (bitsPerValue = 0). */
|
||||
public static final class NullReader implements Reader {
|
||||
public static final class NullReader extends Reader {
|
||||
|
||||
private final int valueCount;
|
||||
|
||||
|
@ -727,16 +727,6 @@ public class PackedInts {
|
|||
return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** A write-once Writer.
|
||||
|
|
Loading…
Reference in New Issue