LUCENE-5301: Add a common interface to PackedInts data-structures, which extends NumericDocValues.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1541063 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-11-12 13:50:57 +00:00
parent 12b06a0b72
commit ea15eae958
10 changed files with 136 additions and 124 deletions

View File

@ -233,12 +233,7 @@ class MemoryDocValuesProducer extends DocValuesProducer {
case DELTA_COMPRESSED:
final int blockSize = data.readVInt();
final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false);
return new NumericDocValues() {
@Override
public long get(int docID) {
return reader.get(docID);
}
};
return reader;
case UNCOMPRESSED:
final byte bytes[] = new byte[maxDoc];
data.readBytes(bytes, 0, bytes.length);

View File

@ -52,6 +52,7 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;
@ -295,24 +296,19 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
return sizeInBytes;
}
LongNumericDocValues getNumeric(NumericEntry entry) throws IOException {
LongValues getNumeric(NumericEntry entry) throws IOException {
final IndexInput data = this.data.clone();
data.seek(entry.offset);
switch (entry.format) {
case DELTA_COMPRESSED:
final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
return new LongNumericDocValues() {
@Override
public long get(long id) {
return reader.get(id);
}
};
return reader;
case GCD_COMPRESSED:
final long min = entry.minValue;
final long mult = entry.gcd;
final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
return new LongNumericDocValues() {
return new LongValues() {
@Override
public long get(long id) {
return min + mult * quotientReader.get(id);
@ -322,7 +318,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
final long table[] = entry.table;
final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
return new LongNumericDocValues() {
return new LongValues() {
@Override
public long get(long id) {
return table[(int) ords.get((int) id)];
@ -522,7 +518,7 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
final long valueCount = binaries.get(field.number).count;
// we keep the byte[]s and list of ords on disk, these could be large
final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field);
final LongNumericDocValues ordinals = getNumeric(ords.get(field.number));
final LongValues ordinals = getNumeric(ords.get(field.number));
// but the addresses to the ord stream are in RAM
final MonotonicBlockPackedReader ordIndex = getOrdIndexInstance(data, field, ordIndexes.get(field.number));
@ -676,15 +672,6 @@ public class Lucene45DocValuesProducer extends DocValuesProducer implements Clos
}
// internally we compose complex dv (sorted/sortedset) from other ones
static abstract class LongNumericDocValues extends NumericDocValues {
@Override
public final long get(int docID) {
return get((long) docID);
}
abstract long get(long id);
}
static abstract class LongBinaryDocValues extends BinaryDocValues {
@Override
public final void get(int docID, BytesRef result) {

View File

@ -0,0 +1,38 @@
package org.apache.lucene.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.util.packed.PackedInts;
/** Abstraction over an array of longs.
* This class extends NumericDocValues so that we don't need to add another
* level of abstraction every time we want eg. to use the {@link PackedInts}
* utility classes to represent a {@link NumericDocValues} instance.
* @lucene.internal */
public abstract class LongValues extends NumericDocValues {
/** Get value at <code>index</code>. */
public abstract long get(long index);
@Override
public long get(int idx) {
return get((long) idx);
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.util.packed;
*/
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
import java.util.Arrays;
@ -25,7 +26,7 @@ import java.util.Arrays;
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
/** Common functionality shared by {@link AppendingDeltaPackedLongBuffer} and {@link MonotonicAppendingLongBuffer}. */
abstract class AbstractAppendingLongBuffer {
abstract class AbstractAppendingLongBuffer extends LongValues {
static final int MIN_PAGE_SIZE = 64;
// More than 1M doesn't really makes sense with these appending buffers
@ -92,7 +93,7 @@ abstract class AbstractAppendingLongBuffer {
abstract void packPendingValues();
/** Get a value from this buffer. */
@Override
public final long get(long index) {
assert index >= 0 && index < size();
final int block = (int) (index >> pageShift);

View File

@ -21,13 +21,14 @@ import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
import static org.apache.lucene.util.packed.PackedInts.numBlocks;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Base implementation for {@link PagedMutable} and {@link PagedGrowableWriter}.
* @lucene.internal
*/
abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> {
abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends LongValues {
static final int MIN_BLOCK_SIZE = 1 << 6;
static final int MAX_BLOCK_SIZE = 1 << 30;
@ -80,7 +81,7 @@ abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> {
return (int) index & pageMask;
}
/** Get value at <code>index</code>. */
@Override
public final long get(long index) {
assert index >= 0 && index < size;
final int pageIndex = pageIndex(index);

View File

@ -29,12 +29,13 @@ import static org.apache.lucene.util.packed.PackedInts.numBlocks;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.LongValues;
/**
* Provides random access to a stream written with {@link BlockPackedWriter}.
* @lucene.internal
*/
public final class BlockPackedReader {
public final class BlockPackedReader extends LongValues {
private final int blockShift, blockMask;
private final long valueCount;
@ -77,7 +78,7 @@ public final class BlockPackedReader {
this.minValues = minValues;
}
/** Get value at <code>index</code>. */
@Override
public long get(long index) {
assert index >= 0 && index < valueCount;
final int block = (int) (index >>> blockShift);

View File

@ -21,8 +21,6 @@ import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.BPV_SHIFT;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MAX_BLOCK_SIZE;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_BLOCK_SIZE;
import static org.apache.lucene.util.packed.AbstractBlockPackedWriter.MIN_VALUE_EQUALS_0;
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.readVLong;
import static org.apache.lucene.util.packed.BlockPackedReaderIterator.zigZagDecode;
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
import java.io.EOFException;

View File

@ -30,7 +30,7 @@ import org.apache.lucene.util.RamUsageEstimator;
*
* <p>@lucene.internal</p>
*/
public class GrowableWriter implements PackedInts.Mutable {
public class GrowableWriter extends PackedInts.Mutable {
private long currentMask;
private PackedInts.Mutable current;

View File

@ -26,6 +26,7 @@ import static org.apache.lucene.util.packed.PackedInts.numBlocks;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
/**
@ -33,7 +34,7 @@ import org.apache.lucene.util.RamUsageEstimator;
* {@link MonotonicBlockPackedWriter}.
* @lucene.internal
*/
public final class MonotonicBlockPackedReader {
public final class MonotonicBlockPackedReader extends LongValues {
private final int blockShift, blockMask;
private final long valueCount;
@ -72,14 +73,14 @@ public final class MonotonicBlockPackedReader {
}
}
/** Get value at <code>index</code>. */
@Override
public long get(long index) {
assert index >= 0 && index < valueCount;
final int block = (int) (index >>> blockShift);
final int idx = (int) (index & blockMask);
return minValues[block] + (long) (idx * averages[block]) + zigZagDecode(subReaders[block].get(idx));
}
/** Returns the number of values */
public long size() {
return valueCount;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
@ -451,19 +452,24 @@ public class PackedInts {
* A read-only random access array of positive integers.
* @lucene.internal
*/
public static interface Reader {
/**
* @param index the position of the wanted value.
* @return the value at the stated index.
*/
long get(int index);
public static abstract class Reader extends NumericDocValues {
/**
* Bulk get: read at least one and at most <code>len</code> longs starting
* from <code>index</code> into <code>arr[off:off+len]</code> and return
* the actual number of values that have been read.
*/
int get(int index, long[] arr, int off, int len);
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < size();
assert off + len <= arr.length;
final int gets = Math.min(size() - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = get(i);
}
return gets;
}
/**
* @return the number of bits used to store any given value.
@ -471,17 +477,17 @@ public class PackedInts {
* {@code bitsPerValue * #values} as implementations are free to
* use non-space-optimal packing of bits.
*/
int getBitsPerValue();
public abstract int getBitsPerValue();
/**
* @return the number of values.
*/
int size();
public abstract int size();
/**
* Return the in-memory size in bytes.
*/
long ramBytesUsed();
public abstract long ramBytesUsed();
/**
* Expert: if the bit-width of this reader matches one of
@ -492,7 +498,10 @@ public class PackedInts {
* interpret the full value as unsigned. Ie,
* bytes[idx]&0xFF, shorts[idx]&0xFFFF, etc.
*/
Object getArray();
public Object getArray() {
assert !hasArray();
return null;
}
/**
* Returns true if this implementation is backed by a
@ -500,7 +509,9 @@ public class PackedInts {
*
* @see #getArray
*/
boolean hasArray();
public boolean hasArray() {
return false;
}
}
@ -558,14 +569,14 @@ public class PackedInts {
* A packed integer array that can be modified.
* @lucene.internal
*/
public static interface Mutable extends Reader {
public static abstract class Mutable extends Reader {
/**
* Set the value at the given index in the array.
* @param index where the value should be positioned.
* @param value a value conforming to the constraints set by the array.
*/
void set(int index, long value);
public abstract void set(int index, long value);
/**
* Bulk set: set at least one and at most <code>len</code> longs starting
@ -573,25 +584,55 @@ public class PackedInts {
* <code>index</code>. Returns the actual number of values that have been
* set.
*/
int set(int index, long[] arr, int off, int len);
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < size();
len = Math.min(len, size() - index);
assert off + len <= arr.length;
for (int i = index, o = off, end = index + len; i < end; ++i, ++o) {
set(i, arr[o]);
}
return len;
}
/**
* Fill the mutable from <code>fromIndex</code> (inclusive) to
* <code>toIndex</code> (exclusive) with <code>val</code>.
*/
void fill(int fromIndex, int toIndex, long val);
public void fill(int fromIndex, int toIndex, long val) {
assert val <= maxValue(getBitsPerValue());
assert fromIndex <= toIndex;
for (int i = fromIndex; i < toIndex; ++i) {
set(i, val);
}
}
/**
* Sets all values to 0.
*/
void clear();
public void clear() {
fill(0, size(), 0);
}
/**
* Save this mutable into <code>out</code>. Instantiating a reader from
* the generated data will return a reader with the same number of bits
* per value.
*/
void save(DataOutput out) throws IOException;
public void save(DataOutput out) throws IOException {
Writer writer = getWriterNoHeader(out, getFormat(), size(), getBitsPerValue(), DEFAULT_BUFFER_SIZE);
writer.writeHeader();
for (int i = 0; i < size(); ++i) {
writer.add(get(i));
}
writer.finish();
}
/** The underlying format. */
Format getFormat() {
return Format.PACKED;
}
}
@ -599,7 +640,7 @@ public class PackedInts {
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
* @lucene.internal
*/
static abstract class ReaderImpl implements Reader {
static abstract class ReaderImpl extends Reader {
protected final int bitsPerValue;
protected final int valueCount;
@ -610,86 +651,45 @@ public class PackedInts {
}
@Override
public int getBitsPerValue() {
public abstract long get(int index);
@Override
public final int getBitsPerValue() {
return bitsPerValue;
}
@Override
public int size() {
public final int size() {
return valueCount;
}
@Override
public Object getArray() {
return null;
}
@Override
public boolean hasArray() {
return false;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
assert off + len <= arr.length;
final int gets = Math.min(valueCount - index, len);
for (int i = index, o = off, end = index + gets; i < end; ++i, ++o) {
arr[o] = get(i);
}
return gets;
}
}
static abstract class MutableImpl extends ReaderImpl implements Mutable {
static abstract class MutableImpl extends Mutable {
protected final int valueCount;
protected final int bitsPerValue;
protected MutableImpl(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
this.valueCount = valueCount;
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
this.bitsPerValue = bitsPerValue;
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
for (int i = index, o = off, end = index + len; i < end; ++i, ++o) {
set(i, arr[o]);
}
return len;
public final int getBitsPerValue() {
return bitsPerValue;
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert val <= maxValue(bitsPerValue);
assert fromIndex <= toIndex;
for (int i = fromIndex; i < toIndex; ++i) {
set(i, val);
}
public final int size() {
return valueCount;
}
protected Format getFormat() {
return Format.PACKED;
}
@Override
public void save(DataOutput out) throws IOException {
Writer writer = getWriterNoHeader(out, getFormat(),
valueCount, bitsPerValue, DEFAULT_BUFFER_SIZE);
writer.writeHeader();
for (int i = 0; i < valueCount; ++i) {
writer.add(get(i));
}
writer.finish();
}
}
/** A {@link Reader} which has all its values equal to 0 (bitsPerValue = 0). */
public static final class NullReader implements Reader {
public static final class NullReader extends Reader {
private final int valueCount;
@ -727,16 +727,6 @@ public class PackedInts {
return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
}
@Override
public Object getArray() {
return null;
}
@Override
public boolean hasArray() {
return false;
}
}
/** A write-once Writer.