mirror of https://github.com/apache/lucene.git
LUCENE-3467: Cut over numeric docvalues to fixed straight bytes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1176906 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
42b419aa31
commit
534d8abed0
|
@ -154,7 +154,7 @@ public class DefaultDocValuesProducer extends PerDocValues {
|
|||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
return Ints.getValues(dir, id, docCount, context);
|
||||
return Ints.getValues(dir, id, docCount, type, context);
|
||||
case FLOAT_32:
|
||||
return Floats.getValues(dir, id, docCount, context);
|
||||
case FLOAT_64:
|
||||
|
|
|
@ -27,16 +27,25 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Provides concrete Writer/Reader implementations for <tt>byte[]</tt> value per
|
||||
|
@ -185,16 +194,18 @@ public final class Bytes {
|
|||
}
|
||||
|
||||
// TODO open up this API?
|
||||
static abstract class BytesBaseSource extends Source {
|
||||
static abstract class BytesSourceBase extends Source {
|
||||
private final PagedBytes pagedBytes;
|
||||
private final ValueType type;
|
||||
protected final IndexInput datIn;
|
||||
protected final IndexInput idxIn;
|
||||
protected final static int PAGED_BYTES_BITS = 15;
|
||||
protected final PagedBytes.Reader data;
|
||||
protected final long totalLengthInBytes;
|
||||
|
||||
|
||||
protected BytesBaseSource(IndexInput datIn, IndexInput idxIn,
|
||||
PagedBytes pagedBytes, long bytesToRead) throws IOException {
|
||||
protected BytesSourceBase(IndexInput datIn, IndexInput idxIn,
|
||||
PagedBytes pagedBytes, long bytesToRead, ValueType type) throws IOException {
|
||||
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
|
||||
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
|
||||
this.datIn = datIn;
|
||||
|
@ -203,6 +214,7 @@ public final class Bytes {
|
|||
this.pagedBytes.copy(datIn, bytesToRead);
|
||||
data = pagedBytes.freeze(true);
|
||||
this.idxIn = idxIn;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
|
@ -220,6 +232,17 @@ public final class Bytes {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns one greater than the largest possible document number.
|
||||
|
@ -245,18 +268,46 @@ public final class Bytes {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
static abstract class DerefBytesSourceBase extends BytesSourceBase {
|
||||
protected final PackedInts.Reader addresses;
|
||||
public DerefBytesSourceBase(IndexInput datIn, IndexInput idxIn, long bytesToRead, ValueType type) throws IOException {
|
||||
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
|
||||
addresses = PackedInts.getReader(idxIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return addresses.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return addresses.size();
|
||||
}
|
||||
|
||||
static abstract class BytesBaseSortedSource extends SortedSource {
|
||||
}
|
||||
|
||||
static abstract class BytesSortedSourceBase extends SortedSource {
|
||||
private final PagedBytes pagedBytes;
|
||||
private final Comparator<BytesRef> comp;
|
||||
protected final PackedInts.Reader docToOrdIndex;
|
||||
private final ValueType type;
|
||||
|
||||
protected final IndexInput datIn;
|
||||
protected final IndexInput idxIn;
|
||||
protected final BytesRef defaultValue = new BytesRef();
|
||||
protected final static int PAGED_BYTES_BITS = 15;
|
||||
private final PagedBytes pagedBytes;
|
||||
protected final PagedBytes.Reader data;
|
||||
private final Comparator<BytesRef> comp;
|
||||
|
||||
|
||||
protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead)
|
||||
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, long bytesToRead, ValueType type) throws IOException {
|
||||
this(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
|
||||
}
|
||||
|
||||
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead,ValueType type)
|
||||
throws IOException {
|
||||
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
|
||||
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
|
||||
|
@ -267,8 +318,15 @@ public final class Bytes {
|
|||
this.idxIn = idxIn;
|
||||
this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
|
||||
: comp;
|
||||
docToOrdIndex = PackedInts.getReader(idxIn);
|
||||
this.type = type;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord(int docID) {
|
||||
return (int) docToOrdIndex.get(docID) -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||
|
@ -277,22 +335,15 @@ public final class Bytes {
|
|||
}
|
||||
|
||||
protected void closeIndexInput() throws IOException {
|
||||
try {
|
||||
if (datIn != null) {
|
||||
datIn.close();
|
||||
}
|
||||
} finally {
|
||||
if (idxIn != null) {// if straight
|
||||
idxIn.close();
|
||||
}
|
||||
}
|
||||
IOUtils.close(datIn, idxIn);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the largest doc id + 1 in this doc values source
|
||||
*/
|
||||
protected abstract int maxDoc();
|
||||
|
||||
public int maxDoc() {
|
||||
return docToOrdIndex.size();
|
||||
}
|
||||
/**
|
||||
* Copies the value for the given ord to the given {@link BytesRef} and
|
||||
* returns it.
|
||||
|
@ -336,6 +387,11 @@ public final class Bytes {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: open up this API?!
|
||||
|
@ -359,7 +415,7 @@ public final class Bytes {
|
|||
this.context = context;
|
||||
}
|
||||
|
||||
protected IndexOutput getDataOut() throws IOException {
|
||||
protected IndexOutput getOrCreateDataOut() throws IOException {
|
||||
if (datOut == null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -375,8 +431,16 @@ public final class Bytes {
|
|||
}
|
||||
return datOut;
|
||||
}
|
||||
|
||||
protected IndexOutput getIndexOut() {
|
||||
return idxOut;
|
||||
}
|
||||
|
||||
protected IndexOutput getDataOut() {
|
||||
return datOut;
|
||||
}
|
||||
|
||||
protected IndexOutput getIndexOut() throws IOException {
|
||||
protected IndexOutput getOrCreateIndexOut() throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
if (idxOut == null) {
|
||||
|
@ -503,5 +567,223 @@ public final class Bytes {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class DerefBytesWriterBase extends BytesWriterBase {
|
||||
protected int size = -1;
|
||||
protected int[] docToEntry;
|
||||
protected final BytesRefHash hash;
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
|
||||
int codecVersion, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context);
|
||||
}
|
||||
|
||||
protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, codecName, codecVersion, bytesUsed, context);
|
||||
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
||||
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||
docToEntry = new int[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
|
||||
throws IOException {
|
||||
if (bytes.length < 128) {
|
||||
datOut.writeByte((byte) bytes.length);
|
||||
return 1;
|
||||
} else {
|
||||
datOut.writeByte((byte) (0x80 | (bytes.length >> 8)));
|
||||
datOut.writeByte((byte) (bytes.length & 0xff));
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, BytesRef bytes) throws IOException {
|
||||
if (bytes.length == 0) { // default value - skip it
|
||||
return;
|
||||
}
|
||||
checkSize(bytes);
|
||||
int ord = hash.add(bytes);
|
||||
if (ord < 0) {
|
||||
ord = (-ord) - 1;
|
||||
}
|
||||
if (docID >= docToEntry.length) {
|
||||
final int size = docToEntry.length;
|
||||
docToEntry = ArrayUtil.grow(docToEntry, 1 + docID);
|
||||
bytesUsed.addAndGet((docToEntry.length - size)
|
||||
* RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
docToEntry[docID] = 1 + ord;
|
||||
}
|
||||
|
||||
protected void checkSize(BytesRef bytes) {
|
||||
if (size == -1) {
|
||||
size = bytes.length;
|
||||
} else if (bytes.length != size) {
|
||||
throw new IllegalArgumentException("expected bytes size=" + size
|
||||
+ " but got " + bytes.length);
|
||||
}
|
||||
}
|
||||
|
||||
// Important that we get docCount, in case there were
|
||||
// some last docs that we didn't see
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
finishInternal(docCount);
|
||||
success = true;
|
||||
} finally {
|
||||
releaseResources();
|
||||
if (success) {
|
||||
IOUtils.close(getIndexOut(), getDataOut());
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(getIndexOut(), getDataOut());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract void finishInternal(int docCount) throws IOException;
|
||||
|
||||
protected void releaseResources() {
|
||||
hash.close();
|
||||
bytesUsed
|
||||
.addAndGet((-docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT);
|
||||
docToEntry = null;
|
||||
}
|
||||
|
||||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||
long maxValue, int[] toEntry) throws IOException {
|
||||
writeIndex(idxOut, docCount, maxValue, (int[])null, toEntry);
|
||||
}
|
||||
|
||||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||
long maxValue, int[] addresses, int[] toEntry) throws IOException {
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
PackedInts.bitsRequired(maxValue));
|
||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||
: docCount;
|
||||
assert toEntry.length >= limit -1;
|
||||
if (addresses != null) {
|
||||
for (int i = 0; i < limit; i++) {
|
||||
assert addresses[toEntry[i]] >= 0;
|
||||
w.add(addresses[toEntry[i]]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < limit; i++) {
|
||||
assert toEntry[i] >= 0;
|
||||
w.add(toEntry[i]);
|
||||
}
|
||||
}
|
||||
for (int i = limit; i < docCount; i++) {
|
||||
w.add(0);
|
||||
}
|
||||
w.finish();
|
||||
}
|
||||
|
||||
protected void writeIndex(IndexOutput idxOut, int docCount,
|
||||
long maxValue, long[] addresses, int[] toEntry) throws IOException {
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
PackedInts.bitsRequired(maxValue));
|
||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||
: docCount;
|
||||
assert toEntry.length >= limit -1;
|
||||
if (addresses != null) {
|
||||
for (int i = 0; i < limit; i++) {
|
||||
assert addresses[toEntry[i]] >= 0;
|
||||
w.add(addresses[toEntry[i]]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < limit; i++) {
|
||||
assert toEntry[i] >= 0;
|
||||
w.add(toEntry[i]);
|
||||
}
|
||||
}
|
||||
for (int i = limit; i < docCount; i++) {
|
||||
w.add(0);
|
||||
}
|
||||
w.finish();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
abstract static class DerefBytesEnumBase extends ValuesEnum {
|
||||
private final PackedInts.ReaderIterator idx;
|
||||
private final int valueCount;
|
||||
private int pos = -1;
|
||||
protected final IndexInput datIn;
|
||||
protected final long fp;
|
||||
protected final int size;
|
||||
|
||||
protected DerefBytesEnumBase(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn, int size, ValueType enumType) throws IOException {
|
||||
super(source, enumType);
|
||||
this.datIn = datIn;
|
||||
this.size = size;
|
||||
idx = PackedInts.getReaderIterator(idxIn);
|
||||
fp = datIn.getFilePointer();
|
||||
if (size > 0) {
|
||||
bytesRef.grow(this.size);
|
||||
bytesRef.length = this.size;
|
||||
}
|
||||
bytesRef.offset = 0;
|
||||
valueCount = idx.size();
|
||||
}
|
||||
|
||||
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||
bytesRef = valuesEnum.bytesRef;
|
||||
if (bytesRef.bytes.length < size) {
|
||||
bytesRef.grow(size);
|
||||
}
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target < valueCount) {
|
||||
long address;
|
||||
while ((address = idx.advance(target)) == 0) {
|
||||
if (++target >= valueCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
pos = idx.ord();
|
||||
fill(address, bytesRef);
|
||||
return pos;
|
||||
}
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= valueCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
datIn.close();
|
||||
} finally {
|
||||
idx.close();
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract void fill(long address, BytesRef ref) throws IOException;
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -19,26 +19,17 @@ package org.apache.lucene.index.values;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// Stores fixed-length byte[] by deref, ie when two docs
|
||||
// have the same value, they store only 1 byte[]
|
||||
|
@ -51,135 +42,55 @@ class FixedDerefBytesImpl {
|
|||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static class Writer extends BytesWriterBase {
|
||||
private int size = -1;
|
||||
private int[] docToID;
|
||||
private final BytesRefHash hash;
|
||||
public static class Writer extends DerefBytesWriterBase {
|
||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
this(dir, id, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
|
||||
bytesUsed, context);
|
||||
}
|
||||
|
||||
public Writer(Directory dir, String id, Allocator allocator,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
hash = new BytesRefHash(new ByteBlockPool(allocator),
|
||||
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||
docToID = new int[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, BytesRef bytes) throws IOException {
|
||||
if (bytes.length == 0) // default value - skip it
|
||||
return;
|
||||
if (size == -1) {
|
||||
size = bytes.length;
|
||||
} else if (bytes.length != size) {
|
||||
throw new IllegalArgumentException("expected bytes size=" + size
|
||||
+ " but got " + bytes.length);
|
||||
}
|
||||
int ord = hash.add(bytes);
|
||||
if (ord < 0) {
|
||||
ord = (-ord) - 1;
|
||||
}
|
||||
if (docID >= docToID.length) {
|
||||
final int size = docToID.length;
|
||||
docToID = ArrayUtil.grow(docToID, 1 + docID);
|
||||
bytesUsed.addAndGet((docToID.length - size)
|
||||
* RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
docToID[docID] = 1 + ord;
|
||||
}
|
||||
|
||||
// Important that we get docCount, in case there were
|
||||
// some last docs that we didn't see
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
protected void finishInternal(int docCount) throws IOException {
|
||||
final int numValues = hash.size();
|
||||
final IndexOutput datOut = getDataOut();
|
||||
try {
|
||||
datOut.writeInt(size);
|
||||
if (size != -1) {
|
||||
final BytesRef bytesRef = new BytesRef(size);
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
hash.get(i, bytesRef);
|
||||
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
|
||||
}
|
||||
final IndexOutput datOut = getOrCreateDataOut();
|
||||
datOut.writeInt(size);
|
||||
if (size != -1) {
|
||||
final BytesRef bytesRef = new BytesRef(size);
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
hash.get(i, bytesRef);
|
||||
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(datOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
hash.close();
|
||||
}
|
||||
success = false;
|
||||
final IndexOutput idxOut = getIndexOut();
|
||||
try {
|
||||
final int count = 1 + numValues;
|
||||
idxOut.writeInt(count - 1);
|
||||
// write index
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
PackedInts.bitsRequired(count - 1));
|
||||
final int limit = docCount > docToID.length ? docToID.length : docCount;
|
||||
for (int i = 0; i < limit; i++) {
|
||||
w.add(docToID[i]);
|
||||
}
|
||||
// fill up remaining doc with zeros
|
||||
for (int i = limit; i < docCount; i++) {
|
||||
w.add(0);
|
||||
}
|
||||
w.finish();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(idxOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(idxOut);
|
||||
}
|
||||
bytesUsed
|
||||
.addAndGet((-docToID.length) * RamUsageEstimator.NUM_BYTES_INT);
|
||||
docToID = null;
|
||||
}
|
||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
idxOut.writeInt(numValues);
|
||||
writeIndex(idxOut, docCount, numValues, docToEntry);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
private final int size;
|
||||
|
||||
private final int numValuesStored;
|
||||
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
size = datIn.readInt();
|
||||
numValuesStored = idxIn.readInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
final IndexInput index = cloneIndex();
|
||||
return new Source(cloneData(), index, size, index.readInt());
|
||||
return new Source(cloneData(), cloneIndex(), size, numValuesStored);
|
||||
}
|
||||
|
||||
private static class Source extends BytesBaseSource {
|
||||
private final PackedInts.Reader index;
|
||||
private static final class Source extends DerefBytesSourceBase {
|
||||
private final int size;
|
||||
private final int numValues;
|
||||
|
||||
protected Source(IndexInput datIn, IndexInput idxIn, int size,
|
||||
int numValues) throws IOException {
|
||||
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues);
|
||||
protected Source(IndexInput datIn, IndexInput idxIn, int size, long numValues) throws IOException {
|
||||
super(datIn, idxIn, size * numValues, ValueType.BYTES_FIXED_DEREF);
|
||||
this.size = size;
|
||||
this.numValues = numValues;
|
||||
index = PackedInts.getReader(idxIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
final int id = (int) index.get(docID);
|
||||
final int id = (int) addresses.get(docID);
|
||||
if (id == 0) {
|
||||
bytesRef.length = 0;
|
||||
return bytesRef;
|
||||
|
@ -187,95 +98,18 @@ class FixedDerefBytesImpl {
|
|||
return data.fillSlice(bytesRef, ((id - 1) * size), size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return numValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_FIXED_DEREF;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return index.size();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
|
||||
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
|
||||
}
|
||||
|
||||
static class DerefBytesEnum extends ValuesEnum {
|
||||
protected final IndexInput datIn;
|
||||
private final PackedInts.ReaderIterator idx;
|
||||
protected final long fp;
|
||||
private final int size;
|
||||
private final int valueCount;
|
||||
private int pos = -1;
|
||||
final static class DerefBytesEnum extends DerefBytesEnumBase {
|
||||
|
||||
public DerefBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn, int size) throws IOException {
|
||||
this(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF);
|
||||
}
|
||||
|
||||
protected DerefBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn, int size, ValueType enumType) throws IOException {
|
||||
super(source, enumType);
|
||||
this.datIn = datIn;
|
||||
this.size = size;
|
||||
idxIn.readInt();// read valueCount
|
||||
idx = PackedInts.getReaderIterator(idxIn);
|
||||
fp = datIn.getFilePointer();
|
||||
if (size > 0) {
|
||||
bytesRef.grow(this.size);
|
||||
bytesRef.length = this.size;
|
||||
}
|
||||
bytesRef.offset = 0;
|
||||
valueCount = idx.size();
|
||||
}
|
||||
|
||||
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||
bytesRef = valuesEnum.bytesRef;
|
||||
if (bytesRef.bytes.length < size) {
|
||||
bytesRef.grow(size);
|
||||
}
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target < valueCount) {
|
||||
long address;
|
||||
while ((address = idx.advance(target)) == 0) {
|
||||
if (++target >= valueCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
pos = idx.ord();
|
||||
fill(address, bytesRef);
|
||||
return pos;
|
||||
}
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= valueCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
datIn.close();
|
||||
} finally {
|
||||
idx.close();
|
||||
}
|
||||
super(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF);
|
||||
}
|
||||
|
||||
protected void fill(long address, BytesRef ref) throws IOException {
|
||||
|
@ -284,12 +118,6 @@ class FixedDerefBytesImpl {
|
|||
ref.length = size;
|
||||
ref.offset = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,28 +20,17 @@ package org.apache.lucene.index.values;
|
|||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
|
||||
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
|
||||
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// Stores fixed-length byte[] by deref, ie when two docs
|
||||
// have the same value, they store only 1 byte[]
|
||||
|
@ -55,132 +44,49 @@ class FixedSortedBytesImpl {
|
|||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static class Writer extends BytesWriterBase {
|
||||
private int size = -1;
|
||||
private int[] docToEntry;
|
||||
static class Writer extends DerefBytesWriterBase {
|
||||
private final Comparator<BytesRef> comp;
|
||||
private final BytesRefHash hash;
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
|
||||
bytesUsed, context);
|
||||
}
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Allocator allocator, Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
ByteBlockPool pool = new ByteBlockPool(allocator);
|
||||
hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY,
|
||||
new TrackingDirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY,
|
||||
bytesUsed));
|
||||
docToEntry = new int[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
this.comp = comp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, BytesRef bytes) throws IOException {
|
||||
if (bytes.length == 0)
|
||||
return; // default - skip it
|
||||
if (size == -1) {
|
||||
size = bytes.length;
|
||||
} else if (bytes.length != size) {
|
||||
throw new IllegalArgumentException("expected bytes size=" + size
|
||||
+ " but got " + bytes.length);
|
||||
}
|
||||
if (docID >= docToEntry.length) {
|
||||
final int[] newArray = new int[ArrayUtil.oversize(1 + docID,
|
||||
RamUsageEstimator.NUM_BYTES_INT)];
|
||||
System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
|
||||
bytesUsed.addAndGet((newArray.length - docToEntry.length)
|
||||
* RamUsageEstimator.NUM_BYTES_INT);
|
||||
docToEntry = newArray;
|
||||
}
|
||||
int e = hash.add(bytes);
|
||||
docToEntry[docID] = 1 + (e < 0 ? (-e) - 1 : e);
|
||||
}
|
||||
|
||||
// Important that we get docCount, in case there were
|
||||
// some last docs that we didn't see
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
final IndexOutput datOut = getDataOut();
|
||||
boolean success = false;
|
||||
public void finishInternal(int docCount) throws IOException {
|
||||
final IndexOutput datOut = getOrCreateDataOut();
|
||||
final int count = hash.size();
|
||||
final int[] address = new int[count];
|
||||
|
||||
try {
|
||||
datOut.writeInt(size);
|
||||
if (size != -1) {
|
||||
final int[] sortedEntries = hash.sort(comp);
|
||||
// first dump bytes data, recording address as we go
|
||||
final BytesRef bytesRef = new BytesRef(size);
|
||||
for (int i = 0; i < count; i++) {
|
||||
final int e = sortedEntries[i];
|
||||
final BytesRef bytes = hash.get(e, bytesRef);
|
||||
assert bytes.length == size;
|
||||
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
address[e] = 1 + i;
|
||||
}
|
||||
final int[] address = new int[count+1]; // addr 0 is default values
|
||||
datOut.writeInt(size);
|
||||
if (size != -1) {
|
||||
final int[] sortedEntries = hash.sort(comp);
|
||||
// first dump bytes data, recording address as we go
|
||||
final BytesRef bytesRef = new BytesRef(size);
|
||||
for (int i = 0; i < count; i++) {
|
||||
final int e = sortedEntries[i];
|
||||
final BytesRef bytes = hash.get(e, bytesRef);
|
||||
assert bytes.length == size;
|
||||
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
address[e + 1] = 1 + i;
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(datOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
hash.close();
|
||||
}
|
||||
final IndexOutput idxOut = getIndexOut();
|
||||
success = false;
|
||||
try {
|
||||
idxOut.writeInt(count);
|
||||
// next write index
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
PackedInts.bitsRequired(count));
|
||||
final int limit;
|
||||
if (docCount > docToEntry.length) {
|
||||
limit = docToEntry.length;
|
||||
} else {
|
||||
limit = docCount;
|
||||
}
|
||||
for (int i = 0; i < limit; i++) {
|
||||
final int e = docToEntry[i];
|
||||
if (e == 0) {
|
||||
// null is encoded as zero
|
||||
w.add(0);
|
||||
} else {
|
||||
assert e > 0 && e <= count : "index must 0 > && <= " + count
|
||||
+ " was: " + e;
|
||||
w.add(address[e - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = limit; i < docCount; i++) {
|
||||
w.add(0);
|
||||
}
|
||||
w.finish();
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(idxOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(idxOut);
|
||||
}
|
||||
bytesUsed.addAndGet((-docToEntry.length)
|
||||
* RamUsageEstimator.NUM_BYTES_INT);
|
||||
docToEntry = null;
|
||||
}
|
||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
idxOut.writeInt(count);
|
||||
writeIndex(idxOut, docCount, count, address, docToEntry);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
private final int size;
|
||||
private final int numValuesStored;
|
||||
|
||||
public Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
size = datIn.readInt();
|
||||
numValuesStored = idxIn.readInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -192,58 +98,35 @@ class FixedSortedBytesImpl {
|
|||
@Override
|
||||
public SortedSource loadSorted(Comparator<BytesRef> comp)
|
||||
throws IOException {
|
||||
final IndexInput idxInput = cloneIndex();
|
||||
final IndexInput datInput = cloneData();
|
||||
datInput.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
|
||||
idxInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
return new Source(datInput, idxInput, size, idxInput.readInt(), comp);
|
||||
return new Source(cloneData(), cloneIndex(), size, numValuesStored, comp);
|
||||
}
|
||||
|
||||
private static class Source extends BytesBaseSortedSource {
|
||||
|
||||
private final PackedInts.Reader index;
|
||||
private final int numValue;
|
||||
private static class Source extends BytesSortedSourceBase {
|
||||
private final int valueCount;
|
||||
private final int size;
|
||||
|
||||
public Source(IndexInput datIn, IndexInput idxIn, int size,
|
||||
int numValues, Comparator<BytesRef> comp) throws IOException {
|
||||
super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), size
|
||||
* numValues);
|
||||
super(datIn, idxIn, comp, size * numValues, ValueType.BYTES_FIXED_SORTED);
|
||||
this.size = size;
|
||||
this.numValue = numValues;
|
||||
index = PackedInts.getReader(idxIn);
|
||||
this.valueCount = numValues;
|
||||
closeIndexInput();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord(int docID) {
|
||||
return (int) index.get(docID) -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
|
||||
return binarySearch(bytes, tmpRef, 0, numValue - 1);
|
||||
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return numValue;
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BytesRef deref(int ord, BytesRef bytesRef) {
|
||||
return data.fillSlice(bytesRef, (ord * size), size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_FIXED_SORTED;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return index.size();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,7 +21,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
|
||||
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -46,26 +46,24 @@ class FixedStraightBytesImpl {
|
|||
static final String CODEC_NAME = "FixedStraightBytes";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static class Writer extends BytesWriterBase {
|
||||
private int size = -1;
|
||||
|
||||
static abstract class FixedBytesWriterBase extends BytesWriterBase {
|
||||
protected int lastDocID = -1;
|
||||
// start at -1 if the first added value is > 0
|
||||
private int lastDocID = -1;
|
||||
protected int size = -1;
|
||||
private final int byteBlockSize = BYTE_BLOCK_SIZE;
|
||||
private final ByteBlockPool pool;
|
||||
private boolean merge;
|
||||
private final int byteBlockSize;
|
||||
private IndexOutput datOut;
|
||||
|
||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
|
||||
int version, Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, codecName, version, bytesUsed, context);
|
||||
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
|
||||
byteBlockSize = BYTE_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void add(int docID, BytesRef bytes) throws IOException {
|
||||
assert lastDocID < docID;
|
||||
assert !merge;
|
||||
|
||||
if (size == -1) {
|
||||
if (bytes.length > BYTE_BLOCK_SIZE) {
|
||||
throw new IllegalArgumentException("bytes arrays > " + Short.MAX_VALUE + " are not supported");
|
||||
|
@ -84,7 +82,6 @@ class FixedStraightBytesImpl {
|
|||
}
|
||||
|
||||
private final void advancePool(int docID) {
|
||||
assert !merge;
|
||||
long numBytes = (docID - (lastDocID+1))*size;
|
||||
while(numBytes > 0) {
|
||||
if (numBytes + pool.byteUpto < byteBlockSize) {
|
||||
|
@ -97,14 +94,50 @@ class FixedStraightBytesImpl {
|
|||
}
|
||||
assert numBytes == 0;
|
||||
}
|
||||
|
||||
protected void set(BytesRef ref, int docId) {
|
||||
assert BYTE_BLOCK_SIZE % size == 0 : "BYTE_BLOCK_SIZE ("+ BYTE_BLOCK_SIZE + ") must be a multiple of the size: " + size;
|
||||
ref.offset = docId*size;
|
||||
ref.length = size;
|
||||
pool.deref(ref);
|
||||
}
|
||||
|
||||
protected void resetPool() {
|
||||
pool.dropBuffersAndReset();
|
||||
}
|
||||
|
||||
protected void writeData(IndexOutput out) throws IOException {
|
||||
pool.writePool(out);
|
||||
}
|
||||
|
||||
protected void writeZeros(int num, IndexOutput out) throws IOException {
|
||||
final byte[] zeros = new byte[size];
|
||||
for (int i = 0; i < num; i++) {
|
||||
out.writeBytes(zeros, zeros.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class Writer extends FixedBytesWriterBase {
|
||||
private boolean merge;
|
||||
private IndexOutput datOut;
|
||||
|
||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
}
|
||||
|
||||
public Writer(Directory dir, String id, String codecName, int version, Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, codecName, version, bytesUsed, context);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void merge(MergeState state) throws IOException {
|
||||
merge = true;
|
||||
datOut = getDataOut();
|
||||
datOut = getOrCreateDataOut();
|
||||
boolean success = false;
|
||||
try {
|
||||
if (state.liveDocs == null && state.reader instanceof Reader) {
|
||||
if (state.liveDocs == null && state.reader instanceof Reader ) {
|
||||
Reader reader = (Reader) state.reader;
|
||||
final int maxDocs = reader.maxDoc;
|
||||
if (maxDocs == 0) {
|
||||
|
@ -113,7 +146,10 @@ class FixedStraightBytesImpl {
|
|||
if (size == -1) {
|
||||
size = reader.size;
|
||||
datOut.writeInt(size);
|
||||
}
|
||||
} else if (size != reader.size) {
|
||||
throw new IllegalArgumentException("expected bytes size=" + size
|
||||
+ " but got " + reader.size);
|
||||
}
|
||||
if (lastDocID+1 < state.docBase) {
|
||||
fill(datOut, state.docBase);
|
||||
lastDocID = state.docBase-1;
|
||||
|
@ -137,7 +173,7 @@ class FixedStraightBytesImpl {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
assert lastDocID < docID;
|
||||
|
@ -158,11 +194,7 @@ class FixedStraightBytesImpl {
|
|||
// Fills up to but not including this docID
|
||||
private void fill(IndexOutput datOut, int docID) throws IOException {
|
||||
assert size >= 0;
|
||||
final long numBytes = (docID - (lastDocID+1))*size;
|
||||
final byte zero = 0;
|
||||
for (long i = 0; i < numBytes; i++) {
|
||||
datOut.writeByte(zero);
|
||||
}
|
||||
writeZeros((docID - (lastDocID+1)), datOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -172,12 +204,12 @@ class FixedStraightBytesImpl {
|
|||
if (!merge) {
|
||||
// indexing path - no disk IO until here
|
||||
assert datOut == null;
|
||||
datOut = getDataOut();
|
||||
datOut = getOrCreateDataOut();
|
||||
if (size == -1) {
|
||||
datOut.writeInt(0);
|
||||
} else {
|
||||
datOut.writeInt(size);
|
||||
pool.writePool(datOut);
|
||||
writeData(datOut);
|
||||
}
|
||||
if (lastDocID + 1 < docCount) {
|
||||
fill(datOut, docCount);
|
||||
|
@ -193,7 +225,7 @@ class FixedStraightBytesImpl {
|
|||
}
|
||||
success = true;
|
||||
} finally {
|
||||
pool.dropBuffersAndReset();
|
||||
resetPool();
|
||||
if (success) {
|
||||
IOUtils.close(datOut);
|
||||
} else {
|
||||
|
@ -201,14 +233,19 @@ class FixedStraightBytesImpl {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
private final int size;
|
||||
private final int maxDoc;
|
||||
|
||||
protected final int size;
|
||||
protected final int maxDoc;
|
||||
|
||||
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, false, context);
|
||||
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context);
|
||||
}
|
||||
|
||||
protected Reader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, codec, version, false, context);
|
||||
size = datIn.readInt();
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
@ -271,13 +308,13 @@ class FixedStraightBytesImpl {
|
|||
|
||||
}
|
||||
|
||||
private static class StraightBytesSource extends BytesBaseSource {
|
||||
private final static class StraightBytesSource extends BytesSourceBase {
|
||||
private final int size;
|
||||
private final int maxDoc;
|
||||
|
||||
public StraightBytesSource(IndexInput datIn, int size, int maxDoc)
|
||||
throws IOException {
|
||||
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc);
|
||||
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc, ValueType.BYTES_FIXED_STRAIGHT);
|
||||
this.size = size;
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
@ -292,11 +329,6 @@ class FixedStraightBytesImpl {
|
|||
return maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_FIXED_STRAIGHT;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return maxDoc;
|
||||
|
@ -308,66 +340,68 @@ class FixedStraightBytesImpl {
|
|||
return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc);
|
||||
}
|
||||
|
||||
private static final class FixedStraightBytesEnum extends ValuesEnum {
|
||||
private final IndexInput datIn;
|
||||
private final int size;
|
||||
private final int maxDoc;
|
||||
private int pos = -1;
|
||||
private final long fp;
|
||||
|
||||
public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
int size, int maxDoc) throws IOException {
|
||||
super(source, ValueType.BYTES_FIXED_STRAIGHT);
|
||||
this.datIn = datIn;
|
||||
this.size = size;
|
||||
this.maxDoc = maxDoc;
|
||||
bytesRef.grow(size);
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
fp = datIn.getFilePointer();
|
||||
}
|
||||
|
||||
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||
bytesRef = valuesEnum.bytesRef;
|
||||
if (bytesRef.bytes.length < size) {
|
||||
bytesRef.grow(size);
|
||||
}
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
datIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc || size == 0) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
if ((target - 1) != pos) // pos inc == 1
|
||||
datIn.seek(fp + target * size);
|
||||
datIn.readBytes(bytesRef.bytes, 0, size);
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_FIXED_STRAIGHT;
|
||||
}
|
||||
}
|
||||
|
||||
static class FixedStraightBytesEnum extends ValuesEnum {
|
||||
private final IndexInput datIn;
|
||||
private final int size;
|
||||
private final int maxDoc;
|
||||
private int pos = -1;
|
||||
private final long fp;
|
||||
|
||||
public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
int size, int maxDoc) throws IOException {
|
||||
super(source, ValueType.BYTES_FIXED_STRAIGHT);
|
||||
this.datIn = datIn;
|
||||
this.size = size;
|
||||
this.maxDoc = maxDoc;
|
||||
bytesRef.grow(size);
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
fp = datIn.getFilePointer();
|
||||
}
|
||||
|
||||
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||
super.copyFrom(valuesEnum);
|
||||
if (bytesRef.bytes.length < size) {
|
||||
bytesRef.grow(size);
|
||||
}
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
datIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc || size == 0) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
if ((target - 1) != pos) // pos inc == 1
|
||||
datIn.seek(fp + target * size);
|
||||
datIn.readBytes(bytesRef.bytes, 0, size);
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,21 +17,14 @@ package org.apache.lucene.index.values;
|
|||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FloatsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
|
||||
|
@ -43,11 +36,6 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class Floats {
|
||||
// TODO - add bulk copy where possible
|
||||
private static final String CODEC_NAME = "SimpleFloats";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
private static final byte[] DEFAULTS = new byte[] {0,0,0,0,0,0,0,0};
|
||||
|
||||
public static Writer getWriter(Directory dir, String id, int precisionBytes,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
|
@ -55,493 +43,73 @@ public class Floats {
|
|||
throw new IllegalArgumentException("precisionBytes must be 4 or 8; got "
|
||||
+ precisionBytes);
|
||||
}
|
||||
if (precisionBytes == 4) {
|
||||
return new Float4Writer(dir, id, bytesUsed, context);
|
||||
} else {
|
||||
return new Float8Writer(dir, id, bytesUsed, context);
|
||||
}
|
||||
return new FloatsWriter(dir, id, bytesUsed, context, precisionBytes);
|
||||
|
||||
}
|
||||
|
||||
public static IndexDocValues getValues(Directory dir, String id, int maxDoc, IOContext context)
|
||||
throws IOException {
|
||||
return new FloatsReader(dir, id, maxDoc, context);
|
||||
}
|
||||
|
||||
abstract static class FloatsWriter extends Writer {
|
||||
private final String id;
|
||||
protected FloatsRef floatsRef;
|
||||
protected int lastDocId = -1;
|
||||
protected IndexOutput datOut;
|
||||
private final byte precision;
|
||||
private final Directory dir;
|
||||
private final IOContext context;
|
||||
|
||||
protected FloatsWriter(Directory dir, String id, int precision,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(bytesUsed);
|
||||
this.id = id;
|
||||
this.precision = (byte) precision;
|
||||
this.dir = dir;
|
||||
this.context = context;
|
||||
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
|
||||
final static class FloatsWriter extends FixedStraightBytesImpl.Writer {
|
||||
private final int size;
|
||||
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context, int size) throws IOException {
|
||||
super(dir, id, bytesUsed, context);
|
||||
this.bytesRef = new BytesRef(size);
|
||||
this.size = size;
|
||||
bytesRef.length = size;
|
||||
}
|
||||
|
||||
final void initDataOut() throws IOException {
|
||||
assert datOut == null;
|
||||
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
||||
Writer.DATA_EXTENSION), context);
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
|
||||
assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME);
|
||||
datOut.writeByte(this.precision);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
public void add(int docID, double v) throws IOException {
|
||||
if (size == 8) {
|
||||
bytesRef.copy(Double.doubleToRawLongBits(v));
|
||||
} else {
|
||||
bytesRef.copy(Float.floatToRawIntBits((float)v));
|
||||
}
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
add(docID, floatsRef.get());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||
add(docID, docValues.getFloat());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setNextEnum(ValuesEnum valuesEnum) {
|
||||
floatsRef = valuesEnum.getFloat();
|
||||
}
|
||||
|
||||
protected final int fillDefault(int numValues) throws IOException {
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
datOut.writeBytes(DEFAULTS, precision);
|
||||
}
|
||||
return numValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void merge(MergeState state) throws IOException {
|
||||
if (datOut == null) {
|
||||
initDataOut();
|
||||
}
|
||||
if (state.liveDocs == null && state.reader instanceof FloatsReader) {
|
||||
// no deletes - bulk copy
|
||||
final FloatsReader reader = (FloatsReader) state.reader;
|
||||
assert reader.precisionBytes == (int) precision;
|
||||
if (reader.maxDoc == 0)
|
||||
return;
|
||||
final int docBase = state.docBase;
|
||||
if (docBase - lastDocId > 1) {
|
||||
// fill with default values
|
||||
lastDocId += fillDefault(docBase - lastDocId - 1);
|
||||
}
|
||||
lastDocId += reader.transferTo(datOut);
|
||||
|
||||
final static class FloatsReader extends FixedStraightBytesImpl.Reader {
|
||||
final IndexDocValuesArray arrayTemplate;
|
||||
FloatsReader(Directory dir, String id, int maxDoc, IOContext context)
|
||||
throws IOException {
|
||||
super(dir, id, maxDoc, context);
|
||||
assert size == 4 || size == 8;
|
||||
if (size == 4) {
|
||||
arrayTemplate = new IndexDocValuesArray.FloatValues();
|
||||
} else {
|
||||
super.merge(state);
|
||||
arrayTemplate = new IndexDocValuesArray.DoubleValues();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(Collection<String> files) throws IOException {
|
||||
files.add(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION));
|
||||
}
|
||||
}
|
||||
|
||||
// Writes 4 bytes (float) per value
|
||||
static final class Float4Writer extends FloatsWriter {
|
||||
private int[] values;
|
||||
protected Float4Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
super(dir, id, 4, bytesUsed, context);
|
||||
values = new int[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(final int docID, final double v)
|
||||
throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
if (docID >= values.length) {
|
||||
final long len = values.length;
|
||||
values = ArrayUtil.grow(values, 1 + docID);
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||
* ((values.length) - len));
|
||||
}
|
||||
values[docID] = Float.floatToRawIntBits((float)v);
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
assert datOut != null;
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
if (docID - lastDocId > 1) {
|
||||
// fill with default values
|
||||
fillDefault(docID - lastDocId - 1);
|
||||
}
|
||||
assert datOut != null;
|
||||
datOut.writeInt(Float.floatToRawIntBits((float) floatsRef.get()));
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
int numDefaultsToAppend = docCount - (lastDocId + 1);
|
||||
if (datOut == null) {
|
||||
initDataOut();
|
||||
for (int i = 0; i <= lastDocId; i++) {
|
||||
datOut.writeInt(values[i]);
|
||||
}
|
||||
}
|
||||
fillDefault(numDefaultsToAppend);
|
||||
success = true;
|
||||
} finally {
|
||||
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_INT
|
||||
* ((values.length))));
|
||||
values = null;
|
||||
if (success) {
|
||||
IOUtils.close(datOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Writes 8 bytes (double) per value
|
||||
static final class Float8Writer extends FloatsWriter {
|
||||
private long[] values;
|
||||
protected Float8Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
super(dir, id, 8, bytesUsed, context);
|
||||
values = new long[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, double v) throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
if (docID >= values.length) {
|
||||
final long len = values.length;
|
||||
values = ArrayUtil.grow(values, 1 + docID);
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
|
||||
* ((values.length) - len));
|
||||
}
|
||||
values[docID] = Double.doubleToLongBits(v);
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
if (docID - lastDocId > 1) {
|
||||
// fill with default values
|
||||
lastDocId += fillDefault(docID - lastDocId - 1);
|
||||
}
|
||||
assert datOut != null;
|
||||
datOut.writeLong(Double.doubleToRawLongBits((float) floatsRef.get()));
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
public Source load() throws IOException {
|
||||
final IndexInput indexInput = cloneData();
|
||||
try {
|
||||
int numDefaultsToAppend = docCount - (lastDocId + 1);
|
||||
if (datOut == null) {
|
||||
initDataOut();
|
||||
for (int i = 0; i <= lastDocId; i++) {
|
||||
datOut.writeLong(values[i]);
|
||||
}
|
||||
}
|
||||
fillDefault(numDefaultsToAppend);
|
||||
success = true;
|
||||
} finally {
|
||||
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG
|
||||
* ((values.length))));
|
||||
values = null;
|
||||
if (success) {
|
||||
IOUtils.close(datOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens all necessary files, but does not read any data in until you call
|
||||
* {@link #load}.
|
||||
*/
|
||||
static class FloatsReader extends IndexDocValues {
|
||||
|
||||
private final IndexInput datIn;
|
||||
private final int precisionBytes;
|
||||
// TODO(simonw) is ByteBuffer the way to go here?
|
||||
private final int maxDoc;
|
||||
|
||||
protected FloatsReader(Directory dir, String id, int maxDoc, IOContext context)
|
||||
throws IOException {
|
||||
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||
Writer.DATA_EXTENSION), context);
|
||||
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
precisionBytes = datIn.readByte();
|
||||
assert precisionBytes == 4 || precisionBytes == 8;
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
||||
int transferTo(IndexOutput out) throws IOException {
|
||||
IndexInput indexInput = (IndexInput) datIn.clone();
|
||||
try {
|
||||
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
// skip precision:
|
||||
indexInput.readByte();
|
||||
out.copyBytes(indexInput, precisionBytes * maxDoc);
|
||||
return arrayTemplate.newFromInput(indexInput, maxDoc);
|
||||
} finally {
|
||||
indexInput.close();
|
||||
}
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the actual values. You may call this more than once, eg if you
|
||||
* already previously loaded but then discarded the Source.
|
||||
*/
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
/* we always read BIG_ENDIAN here since the writer uses
|
||||
* DataOutput#writeInt() / writeLong() we can simply read the ints / longs
|
||||
* back in using readInt / readLong */
|
||||
final IndexInput indexInput = (IndexInput) datIn.clone();
|
||||
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
// skip precision:
|
||||
indexInput.readByte();
|
||||
if (precisionBytes == 4) {
|
||||
final float[] values = new float[(4 * maxDoc) >> 2];
|
||||
assert values.length == maxDoc;
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Float.intBitsToFloat(indexInput.readInt());
|
||||
}
|
||||
return new Source4(values);
|
||||
} else {
|
||||
final double[] values = new double[(8 * maxDoc) >> 3];
|
||||
assert values.length == maxDoc;
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Double.longBitsToDouble(indexInput.readLong());
|
||||
}
|
||||
return new Source8(values);
|
||||
}
|
||||
}
|
||||
|
||||
private final class Source4 extends Source {
|
||||
private final float[] values;
|
||||
|
||||
Source4(final float[] values ) throws IOException {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource)
|
||||
throws IOException {
|
||||
return new SourceEnum(attrSource, ValueType.FLOAT_32, this, maxDoc) {
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs)
|
||||
return pos = NO_MORE_DOCS;
|
||||
floatsRef.floats[floatsRef.offset] = source.getFloat(target);
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return this.values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.FLOAT_32;
|
||||
}
|
||||
}
|
||||
|
||||
private final class Source8 extends Source {
|
||||
private final double[] values;
|
||||
|
||||
Source8(final double[] values) throws IOException {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource)
|
||||
throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, maxDoc) {
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs)
|
||||
return pos = NO_MORE_DOCS;
|
||||
floatsRef.floats[floatsRef.offset] = source.getFloat(target);
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.FLOAT_64;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return this.values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
datIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
IndexInput indexInput = (IndexInput) datIn.clone();
|
||||
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
// skip precision:
|
||||
indexInput.readByte();
|
||||
return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc)
|
||||
: new Floats8EnumImpl(source, indexInput, maxDoc);
|
||||
return arrayTemplate.getDirectEnum(source, indexInput, maxDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return precisionBytes == 4 ? ValueType.FLOAT_32
|
||||
: ValueType.FLOAT_64;
|
||||
return arrayTemplate.type();
|
||||
}
|
||||
}
|
||||
|
||||
static final class Floats4Enum extends FloatsEnumImpl {
|
||||
|
||||
Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc)
|
||||
throws IOException {
|
||||
super(source, dataIn, 4, maxDoc, ValueType.FLOAT_32);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc)
|
||||
return pos = NO_MORE_DOCS;
|
||||
dataIn.seek(fp + (target * precision));
|
||||
final int intBits = dataIn.readInt();
|
||||
floatsRef.floats[0] = Float.intBitsToFloat(intBits);
|
||||
floatsRef.offset = 0;
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class Floats8EnumImpl extends FloatsEnumImpl {
|
||||
|
||||
Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc)
|
||||
throws IOException {
|
||||
super(source, dataIn, 8, maxDoc, ValueType.FLOAT_64);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
dataIn.seek(fp + (target * precision));
|
||||
final long value = dataIn.readLong();
|
||||
floatsRef.floats[floatsRef.offset] = Double.longBitsToDouble(value);
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class FloatsEnumImpl extends ValuesEnum {
|
||||
protected final IndexInput dataIn;
|
||||
protected int pos = -1;
|
||||
protected final int precision;
|
||||
protected final int maxDoc;
|
||||
protected final long fp;
|
||||
|
||||
FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision,
|
||||
int maxDoc, ValueType type) throws IOException {
|
||||
super(source, precision == 4 ? ValueType.FLOAT_32
|
||||
: ValueType.FLOAT_64);
|
||||
this.dataIn = dataIn;
|
||||
this.precision = precision;
|
||||
this.maxDoc = maxDoc;
|
||||
fp = dataIn.getFilePointer();
|
||||
floatsRef.offset = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
dataIn.close();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,14 +2,12 @@ package org.apache.lucene.index.values;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.FixedStraightBytesImpl.FixedStraightBytesEnum;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
|
@ -34,48 +32,33 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
*/
|
||||
abstract class IndexDocValuesArray extends Source {
|
||||
|
||||
private final Counter bytesUsed;
|
||||
private final int bytesPerValue;
|
||||
private int size = 0;
|
||||
protected final int bytesPerValue;
|
||||
private final ValueType type;
|
||||
private final boolean isFloat;
|
||||
protected int maxDocID = -1;
|
||||
|
||||
IndexDocValuesArray(Counter bytesUsed, int bytesPerValue, ValueType type) {
|
||||
this.bytesUsed = bytesUsed;
|
||||
IndexDocValuesArray(int bytesPerValue, ValueType type) {
|
||||
this.bytesPerValue = bytesPerValue;
|
||||
this.type = type;
|
||||
}
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
isFloat = false;
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
isFloat = true;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type: " + type);
|
||||
|
||||
void set(int docId, long value) {
|
||||
if (docId >= size) {
|
||||
adjustSize(grow(docId + 1));
|
||||
}
|
||||
if (docId > maxDocID) {
|
||||
maxDocID = docId;
|
||||
}
|
||||
setInternal(docId, value);
|
||||
}
|
||||
|
||||
protected final void adjustSize(int newSize) {
|
||||
bytesUsed.addAndGet(bytesPerValue * (newSize - size));
|
||||
size = newSize;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
adjustSize(0);
|
||||
maxDocID = -1;
|
||||
size = 0;
|
||||
}
|
||||
|
||||
protected abstract void writeDirect(IndexOutput out, long value) throws IOException;
|
||||
|
||||
protected abstract void writeDefaults(IndexOutput out, int num) throws IOException;
|
||||
|
||||
protected abstract void setInternal(int docId, long value);
|
||||
|
||||
protected abstract int grow(int numDocs);
|
||||
|
||||
abstract void write(IndexOutput output, int numDocs) throws IOException;
|
||||
public abstract IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public final int getValueCount() {
|
||||
|
@ -89,21 +72,38 @@ abstract class IndexDocValuesArray extends Source {
|
|||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
|
||||
if (isFloat) {
|
||||
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
floatsRef.floats[intsRef.offset] = IndexDocValuesArray.this
|
||||
.getFloat(target);
|
||||
return pos = target;
|
||||
}
|
||||
intsRef.ints[intsRef.offset] = IndexDocValuesArray.this.getInt(target);
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
};
|
||||
} else {
|
||||
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
intsRef.ints[intsRef.offset] = IndexDocValuesArray.this
|
||||
.getInt(target);
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
abstract ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
|
||||
throws IOException;
|
||||
abstract ValuesEnum getDirectEnum(AttributeSource attrSource,
|
||||
IndexInput input, int maxDoc) throws IOException;
|
||||
|
||||
@Override
|
||||
public final boolean hasArray() {
|
||||
|
@ -111,17 +111,16 @@ abstract class IndexDocValuesArray extends Source {
|
|||
}
|
||||
|
||||
final static class ByteValues extends IndexDocValuesArray {
|
||||
private byte[] values;
|
||||
private final byte[] values;
|
||||
|
||||
ByteValues(Counter bytesUsed) {
|
||||
super(bytesUsed, 1, ValueType.FIXED_INTS_8);
|
||||
ByteValues() {
|
||||
super(1, ValueType.FIXED_INTS_8);
|
||||
values = new byte[0];
|
||||
}
|
||||
|
||||
ByteValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(Counter.newCounter(), 1, ValueType.FIXED_INTS_8);
|
||||
private ByteValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(1, ValueType.FIXED_INTS_8);
|
||||
values = new byte[numDocs];
|
||||
adjustSize(numDocs);
|
||||
input.readBytes(values, 0, values.length, false);
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
@ -138,69 +137,37 @@ abstract class IndexDocValuesArray extends Source {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setInternal(int docId, long value) {
|
||||
values[docId] = (byte) (0xFFL & value);
|
||||
}
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FixedIntsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
|
||||
@Override
|
||||
protected int grow(int numDocs) {
|
||||
values = ArrayUtil.grow(values, numDocs);
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
void write(IndexOutput output, int numDocs) throws IOException {
|
||||
assert maxDocID + 1 <= numDocs;
|
||||
output.writeBytes(values, 0, maxDocID + 1);
|
||||
writeDefaults(output, numDocs - (maxDocID+1));
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
|
||||
throws IOException {
|
||||
return new FixedIntsEnumImpl(attrSource, input, type(), maxDoc) {
|
||||
@Override
|
||||
protected void fillNext(LongsRef ref, IndexInput dataIn)
|
||||
throws IOException {
|
||||
ref.ints[ref.offset] = dataIn.readByte();
|
||||
protected final long toLong(BytesRef bytesRef) {
|
||||
return bytesRef.bytes[bytesRef.offset];
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
void clear() {
|
||||
super.clear();
|
||||
values = new byte[0];
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new ByteValues(input, numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeDefaults(IndexOutput out, int num) throws IOException {
|
||||
final byte zero = 0;
|
||||
for (int i = 0; i < num; i++) {
|
||||
out.writeByte(zero);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeDirect(IndexOutput out, long value) throws IOException {
|
||||
out.writeByte((byte) (0xFFL & value));
|
||||
}
|
||||
};
|
||||
|
||||
final static class ShortValues extends IndexDocValuesArray {
|
||||
private short[] values;
|
||||
private final short[] values;
|
||||
|
||||
ShortValues(Counter bytesUsed) {
|
||||
super(bytesUsed, RamUsageEstimator.NUM_BYTES_SHORT,
|
||||
ValueType.FIXED_INTS_16);
|
||||
ShortValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_SHORT, ValueType.FIXED_INTS_16);
|
||||
values = new short[0];
|
||||
}
|
||||
|
||||
ShortValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(Counter.newCounter(), RamUsageEstimator.NUM_BYTES_SHORT,
|
||||
ValueType.FIXED_INTS_16);
|
||||
private ShortValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_SHORT, ValueType.FIXED_INTS_16);
|
||||
values = new short[numDocs];
|
||||
adjustSize(numDocs);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readShort();
|
||||
}
|
||||
|
@ -219,71 +186,37 @@ abstract class IndexDocValuesArray extends Source {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setInternal(int docId, long value) {
|
||||
values[docId] = (short) (0xFFFFL & value);
|
||||
}
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FixedIntsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
|
||||
@Override
|
||||
protected int grow(int numDocs) {
|
||||
values = ArrayUtil.grow(values, numDocs);
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
void write(IndexOutput output, int numDocs) throws IOException {
|
||||
assert maxDocID + 1 <= numDocs;
|
||||
for (int i = 0; i < maxDocID + 1; i++) {
|
||||
output.writeShort(values[i]);
|
||||
}
|
||||
writeDefaults(output, numDocs - (maxDocID+1));
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
|
||||
throws IOException {
|
||||
return new FixedIntsEnumImpl(attrSource, input, type(), maxDoc) {
|
||||
@Override
|
||||
protected void fillNext(LongsRef ref, IndexInput dataIn)
|
||||
throws IOException {
|
||||
ref.ints[ref.offset] = dataIn.readShort();
|
||||
protected final long toLong(BytesRef bytesRef) {
|
||||
return bytesRef.asShort();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
void clear() {
|
||||
super.clear();
|
||||
values = new short[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeDefaults(IndexOutput out, int num) throws IOException {
|
||||
final short zero = 0;
|
||||
for (int i = 0; i < num; i++) {
|
||||
out.writeShort(zero);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeDirect(IndexOutput out, long value) throws IOException {
|
||||
out.writeShort((short) (0xFFFFL & value));
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new ShortValues(input, numDocs);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class IntValues extends IndexDocValuesArray {
|
||||
private int[] values;
|
||||
private final int[] values;
|
||||
|
||||
IntValues(Counter bytesUsed) {
|
||||
super(bytesUsed, RamUsageEstimator.NUM_BYTES_INT, ValueType.FIXED_INTS_32);
|
||||
IntValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_INT, ValueType.FIXED_INTS_32);
|
||||
values = new int[0];
|
||||
}
|
||||
|
||||
IntValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(Counter.newCounter(), RamUsageEstimator.NUM_BYTES_INT,
|
||||
ValueType.FIXED_INTS_32);
|
||||
private IntValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_INT, ValueType.FIXED_INTS_32);
|
||||
values = new int[numDocs];
|
||||
adjustSize(numDocs);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readInt();
|
||||
}
|
||||
|
@ -302,71 +235,36 @@ abstract class IndexDocValuesArray extends Source {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setInternal(int docId, long value) {
|
||||
values[docId] = (int) (0xFFFFFFFF & value);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int grow(int numDocs) {
|
||||
values = ArrayUtil.grow(values, numDocs);
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
void write(IndexOutput output, int numDocs) throws IOException {
|
||||
assert maxDocID + 1 <= numDocs;
|
||||
for (int i = 0; i < maxDocID + 1; i++) {
|
||||
output.writeInt(values[i]);
|
||||
}
|
||||
writeDefaults(output, numDocs - (maxDocID+1));
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
|
||||
throws IOException {
|
||||
return new FixedIntsEnumImpl(attrSource, input, type(), maxDoc) {
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FixedIntsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
@Override
|
||||
protected void fillNext(LongsRef ref, IndexInput dataIn)
|
||||
throws IOException {
|
||||
ref.ints[ref.offset] = dataIn.readInt();
|
||||
protected final long toLong(BytesRef bytesRef) {
|
||||
return bytesRef.asInt();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
void clear() {
|
||||
super.clear();
|
||||
values = new int[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeDefaults(IndexOutput out, int num) throws IOException {
|
||||
for (int i = 0; i < num; i++) {
|
||||
out.writeInt(0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeDirect(IndexOutput out, long value) throws IOException {
|
||||
out.writeInt((int) (0xFFFFFFFFL & value));
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new IntValues(input, numDocs);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class LongValues extends IndexDocValuesArray {
|
||||
private long[] values;
|
||||
private final long[] values;
|
||||
|
||||
LongValues(Counter bytesUsed) {
|
||||
super(bytesUsed, RamUsageEstimator.NUM_BYTES_LONG,
|
||||
ValueType.FIXED_INTS_64);
|
||||
LongValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_LONG, ValueType.FIXED_INTS_64);
|
||||
values = new long[0];
|
||||
}
|
||||
|
||||
LongValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(Counter.newCounter(), RamUsageEstimator.NUM_BYTES_LONG,
|
||||
ValueType.FIXED_INTS_64);
|
||||
private LongValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_LONG, ValueType.FIXED_INTS_64);
|
||||
values = new long[numDocs];
|
||||
adjustSize(numDocs);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readLong();
|
||||
}
|
||||
|
@ -385,122 +283,179 @@ abstract class IndexDocValuesArray extends Source {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setInternal(int docId, long value) {
|
||||
values[docId] = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int grow(int numDocs) {
|
||||
values = ArrayUtil.grow(values, numDocs);
|
||||
return values.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
void write(IndexOutput output, int numDocs) throws IOException {
|
||||
assert maxDocID + 1 <= numDocs;
|
||||
for (int i = 0; i < maxDocID + 1; i++) {
|
||||
output.writeLong(values[i]);
|
||||
}
|
||||
writeDefaults(output, numDocs - (maxDocID+1));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
|
||||
throws IOException {
|
||||
return new FixedIntsEnumImpl(attrSource, input, type(), maxDoc) {
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FixedIntsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
@Override
|
||||
protected void fillNext(LongsRef ref, IndexInput dataIn)
|
||||
throws IOException {
|
||||
ref.ints[ref.offset] = dataIn.readLong();
|
||||
protected final long toLong(BytesRef bytesRef) {
|
||||
return bytesRef.asLong();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
void clear() {
|
||||
super.clear();
|
||||
values = new long[0];
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new LongValues(input, numDocs);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class FloatValues extends IndexDocValuesArray {
|
||||
private final float[] values;
|
||||
|
||||
FloatValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_FLOAT, ValueType.FLOAT_32);
|
||||
values = new float[0];
|
||||
}
|
||||
|
||||
private FloatValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_FLOAT, ValueType.FLOAT_32);
|
||||
values = new float[numDocs];
|
||||
/* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs
|
||||
* back in using readInt / readLong */
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Float.intBitsToFloat(input.readInt());
|
||||
}
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeDefaults(IndexOutput out, int num) throws IOException {
|
||||
for (int i = 0; i < num; i++) {
|
||||
out.writeLong(0l);
|
||||
}
|
||||
public float[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeDirect(IndexOutput out, long value) throws IOException {
|
||||
out.writeLong(value);
|
||||
public double getFloat(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FloatsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
@Override
|
||||
protected double toDouble(BytesRef bytesRef) {
|
||||
return Float.intBitsToFloat(bytesRef.asInt());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new FloatValues(input, numDocs);
|
||||
}
|
||||
};
|
||||
|
||||
final static class DoubleValues extends IndexDocValuesArray {
|
||||
private final double[] values;
|
||||
|
||||
DoubleValues() {
|
||||
super(RamUsageEstimator.NUM_BYTES_DOUBLE, ValueType.FLOAT_64);
|
||||
values = new double[0];
|
||||
}
|
||||
|
||||
private DoubleValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_DOUBLE, ValueType.FLOAT_64);
|
||||
values = new double[numDocs];
|
||||
/* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs
|
||||
* back in using readInt / readLong */
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Double.longBitsToDouble(input.readLong());
|
||||
}
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double[] getArray() {
|
||||
return values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FloatsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
@Override
|
||||
protected double toDouble(BytesRef bytesRef) {
|
||||
return Double.longBitsToDouble(bytesRef.asLong());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new DoubleValues(input, numDocs);
|
||||
}
|
||||
};
|
||||
|
||||
private abstract static class FixedIntsEnumImpl extends ValuesEnum {
|
||||
private final IndexInput dataIn;
|
||||
private final int maxDoc;
|
||||
private final int sizeInByte;
|
||||
private int pos = -1;
|
||||
private abstract static class FixedIntsEnum extends
|
||||
FixedStraightBytesEnum {
|
||||
private final ValueType type;
|
||||
|
||||
private FixedIntsEnumImpl(AttributeSource source, IndexInput dataIn,
|
||||
ValueType type, int maxDoc) throws IOException {
|
||||
super(source, type);
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
sizeInByte = 2;
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
sizeInByte = 4;
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
sizeInByte = 8;
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
sizeInByte = 1;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("type " + type
|
||||
+ " is not a fixed int type");
|
||||
}
|
||||
intsRef.offset = 0;
|
||||
this.dataIn = dataIn;
|
||||
this.maxDoc = maxDoc;
|
||||
private FixedIntsEnum(AttributeSource source, IndexInput dataIn,
|
||||
ValueType type, int bytesPerValue, int maxDoc) throws IOException {
|
||||
super(source, dataIn, bytesPerValue, maxDoc);
|
||||
this.type = type;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
dataIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
final int advance = super.advance(target);
|
||||
if (advance != NO_MORE_DOCS) {
|
||||
intsRef.ints[0] = toLong(this.bytesRef);
|
||||
}
|
||||
assert target > pos;
|
||||
if (target > pos + 1) {
|
||||
dataIn
|
||||
.seek(dataIn.getFilePointer() + ((target - pos - 1) * sizeInByte));
|
||||
}
|
||||
fillNext(intsRef, dataIn);
|
||||
return pos = target;
|
||||
return advance;
|
||||
}
|
||||
|
||||
protected abstract void fillNext(LongsRef ref, IndexInput input)
|
||||
throws IOException;
|
||||
|
||||
protected abstract long toLong(BytesRef bytesRef);
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
|
||||
private abstract static class FloatsEnum extends FixedStraightBytesEnum {
|
||||
|
||||
private final ValueType type;
|
||||
FloatsEnum(AttributeSource source, IndexInput dataIn, ValueType type, int bytePerValue, int maxDoc)
|
||||
throws IOException {
|
||||
super(source, dataIn, bytePerValue, maxDoc);
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
final int retVal = super.advance(target);
|
||||
if (retVal != NO_MORE_DOCS) {
|
||||
floatsRef.floats[floatsRef.offset] = toDouble(bytesRef);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
protected abstract double toDouble(BytesRef bytesRef);
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,28 +19,169 @@ package org.apache.lucene.index.values;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.IntsImpl.IntsReader;
|
||||
import org.apache.lucene.index.values.IntsImpl.IntsWriter;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.IntValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.ShortValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Stores ints packed and fixed with fixed-bit precision.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Ints {
|
||||
// TODO - add bulk copy where possible
|
||||
public final class Ints {
|
||||
|
||||
private Ints() {
|
||||
}
|
||||
|
||||
public static Writer getWriter(Directory dir, String id,
|
||||
Counter bytesUsed, ValueType type, IOContext context) throws IOException {
|
||||
return new IntsWriter(dir, id, bytesUsed, type, context);
|
||||
public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
|
||||
ValueType type, IOContext context) throws IOException {
|
||||
return type == ValueType.VAR_INTS ? new PackedIntValues.PackedIntsWriter(dir, id,
|
||||
bytesUsed, context) : new IntsWriter(dir, id, bytesUsed, context, type);
|
||||
}
|
||||
|
||||
public static IndexDocValues getValues(Directory dir, String id,
|
||||
int numDocs, IOContext context) throws IOException {
|
||||
return new IntsReader(dir, id, numDocs, context);
|
||||
public static IndexDocValues getValues(Directory dir, String id, int numDocs,
|
||||
ValueType type, IOContext context) throws IOException {
|
||||
return type == ValueType.VAR_INTS ? new PackedIntValues.PackedIntsReader(dir, id,
|
||||
numDocs, context) : new IntsReader(dir, id, numDocs, context);
|
||||
}
|
||||
|
||||
static class IntsWriter extends FixedStraightBytesImpl.Writer {
|
||||
protected static final String CODEC_NAME = "Ints";
|
||||
protected static final int VERSION_START = 0;
|
||||
protected static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
private final ValueType valueType;
|
||||
|
||||
public IntsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context, ValueType valueType) throws IOException {
|
||||
this(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, valueType);
|
||||
}
|
||||
|
||||
protected IntsWriter(Directory dir, String id, String codecName,
|
||||
int version, Counter bytesUsed, IOContext context, ValueType valueType) throws IOException {
|
||||
super(dir, id, codecName, version, bytesUsed, context);
|
||||
this.valueType = valueType;
|
||||
final int expectedSize = getSize(valueType);
|
||||
this.bytesRef = new BytesRef(expectedSize);
|
||||
bytesRef.length = expectedSize;
|
||||
}
|
||||
|
||||
private static int getSize(ValueType type) {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
return 2;
|
||||
case FIXED_INTS_32:
|
||||
return 4;
|
||||
case FIXED_INTS_64:
|
||||
return 8;
|
||||
case FIXED_INTS_8:
|
||||
return 1;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type " + type);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, long v) throws IOException {
|
||||
switch (valueType) {
|
||||
case FIXED_INTS_64:
|
||||
bytesRef.copy(v);
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
bytesRef.copy((int) (0xFFFFFFFF & v));
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
bytesRef.copy((short) (0xFFFFL & v));
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
bytesRef.bytes[0] = (byte) (0xFFL & v);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type " + valueType);
|
||||
}
|
||||
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||
add(docID, docValues.getInt());
|
||||
}
|
||||
}
|
||||
|
||||
final static class IntsReader extends FixedStraightBytesImpl.Reader {
|
||||
private final ValueType type;
|
||||
private final IndexDocValuesArray arrayTemplate;
|
||||
|
||||
IntsReader(Directory dir, String id, int maxDoc, IOContext context)
|
||||
throws IOException {
|
||||
super(dir, id, IntsWriter.CODEC_NAME, IntsWriter.VERSION_CURRENT, maxDoc,
|
||||
context);
|
||||
switch (size) {
|
||||
case 8:
|
||||
type = ValueType.FIXED_INTS_64;
|
||||
arrayTemplate = new LongValues();
|
||||
break;
|
||||
case 4:
|
||||
type = ValueType.FIXED_INTS_32;
|
||||
arrayTemplate = new IntValues();
|
||||
break;
|
||||
case 2:
|
||||
type = ValueType.FIXED_INTS_16;
|
||||
arrayTemplate = new ShortValues();
|
||||
break;
|
||||
case 1:
|
||||
type = ValueType.FIXED_INTS_8;
|
||||
arrayTemplate = new ByteValues();
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("illegal size: " + size);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
boolean success = false;
|
||||
IndexInput input = null;
|
||||
try {
|
||||
input = cloneData();
|
||||
final Source source = arrayTemplate.newFromInput(input, maxDoc);
|
||||
success = true;
|
||||
return source;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input, datIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
final IndexInput input = cloneData();
|
||||
boolean success = false;
|
||||
try {
|
||||
final ValuesEnum valuesEnum = arrayTemplate.getDirectEnum(source,
|
||||
input, maxDoc);
|
||||
success = true;
|
||||
return valuesEnum;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,515 +0,0 @@
|
|||
package org.apache.lucene.index.values;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.IntValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.ShortValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Stores ints packed and fixed with fixed-bit precision.
|
||||
*
|
||||
* @lucene.experimental
|
||||
* */
|
||||
class IntsImpl {
|
||||
|
||||
private static final String CODEC_NAME = "Ints";
|
||||
private static final byte PACKED = 0x00;
|
||||
private static final byte FIXED_64 = 0x01;
|
||||
private static final byte FIXED_32 = 0x02;
|
||||
private static final byte FIXED_16 = 0x03;
|
||||
private static final byte FIXED_8 = 0x04;
|
||||
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static class IntsWriter extends Writer {
|
||||
|
||||
private LongsRef intsRef;
|
||||
private final IndexDocValuesArray array;
|
||||
private long minValue;
|
||||
private long maxValue;
|
||||
private boolean started;
|
||||
private final String id;
|
||||
private int lastDocId = -1;
|
||||
private final Directory dir;
|
||||
private final byte typeOrd;
|
||||
private IndexOutput datOut;
|
||||
private boolean merging;
|
||||
private final IOContext context;
|
||||
|
||||
|
||||
protected IntsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
ValueType valueType, IOContext context) throws IOException {
|
||||
super(bytesUsed);
|
||||
this.context = context;
|
||||
this.dir = dir;
|
||||
this.id = id;
|
||||
switch (valueType) {
|
||||
case FIXED_INTS_16:
|
||||
array= new ShortValues(bytesUsed);
|
||||
typeOrd = FIXED_16;
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
array = new IntValues(bytesUsed);
|
||||
typeOrd = FIXED_32;
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
array = new LongValues(bytesUsed);
|
||||
typeOrd = FIXED_64;
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
array = new ByteValues(bytesUsed);
|
||||
typeOrd = FIXED_8;
|
||||
break;
|
||||
case VAR_INTS:
|
||||
array = new LongValues(bytesUsed);
|
||||
typeOrd = PACKED;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("unknown type " + valueType);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, long v) throws IOException {
|
||||
assert lastDocId < docID;
|
||||
if (!started) {
|
||||
started = true;
|
||||
minValue = maxValue = v;
|
||||
} else {
|
||||
if (v < minValue) {
|
||||
minValue = v;
|
||||
} else if (v > maxValue) {
|
||||
maxValue = v;
|
||||
}
|
||||
}
|
||||
lastDocId = docID;
|
||||
array.set(docID, v);
|
||||
}
|
||||
|
||||
private final void initDataOut(byte typeOrd) throws IOException {
|
||||
if (datOut == null) {
|
||||
boolean success = false;
|
||||
try {
|
||||
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
||||
DATA_EXTENSION), context);
|
||||
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
|
||||
datOut.writeByte(typeOrd);
|
||||
success = true;
|
||||
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
if (datOut == null) {
|
||||
// if we only add or merge Packed ints datOut is not initialized
|
||||
assert !merging || typeOrd == PACKED;
|
||||
finishAdd(docCount);
|
||||
} else {
|
||||
assert datOut != null && merging && typeOrd != PACKED;
|
||||
// on merge, simply fill up missing values
|
||||
fillDefault(datOut, docCount - (lastDocId + 1));
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(datOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
array.clear();
|
||||
}
|
||||
}
|
||||
|
||||
private final void finishAdd(int docCount) throws IOException {
|
||||
if (!started) {
|
||||
minValue = maxValue = 0;
|
||||
}
|
||||
byte headerType = typeOrd;
|
||||
if (typeOrd == PACKED) {
|
||||
final long delta = maxValue - minValue;
|
||||
// if we exceed the range of positive longs we must switch to fixed
|
||||
// ints
|
||||
if (delta <= (maxValue >= 0 && minValue <= 0 ? Long.MAX_VALUE
|
||||
: Long.MAX_VALUE - 1) && delta >= 0) {
|
||||
writePackedInts(docCount);
|
||||
return; // done
|
||||
} else {
|
||||
headerType = FIXED_64;
|
||||
}
|
||||
}
|
||||
initDataOut(headerType);
|
||||
array.write(datOut, docCount);
|
||||
assert datOut != null;
|
||||
}
|
||||
// TODO how can we improve VAR_INT mergeing here without violating compression?
|
||||
@Override
|
||||
protected void merge(MergeState state) throws IOException {
|
||||
merging = true;
|
||||
if (typeOrd != PACKED) {
|
||||
initDataOut(typeOrd); // init datOut since we merge directly
|
||||
if (state.liveDocs == null && state.reader instanceof IntsReader) {
|
||||
// no deleted docs - try bulk copy
|
||||
final IntsReader reader = (IntsReader) state.reader;
|
||||
if (reader.type == typeOrd) {
|
||||
final int docBase = state.docBase;
|
||||
if (docBase - lastDocId > 1) {
|
||||
// fill with default values
|
||||
lastDocId += fillDefault(datOut, docBase - lastDocId - 1);
|
||||
}
|
||||
lastDocId += reader.transferTo(datOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
super.merge(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
assert merging;
|
||||
final long value = intsRef.get();
|
||||
if (typeOrd != PACKED) {
|
||||
// if now packed we do straight merging and write values directly
|
||||
assert datOut != null;
|
||||
if (docID - lastDocId > 1) {
|
||||
// fill with default values
|
||||
array.writeDefaults(datOut, docID - lastDocId - 1);
|
||||
}
|
||||
array.writeDirect(datOut, value);
|
||||
lastDocId = docID;
|
||||
} else {
|
||||
add(docID, value);
|
||||
}
|
||||
}
|
||||
|
||||
protected final int fillDefault(IndexOutput datOut, int numValues) throws IOException {
|
||||
array.writeDefaults(datOut, numValues);
|
||||
return numValues;
|
||||
}
|
||||
|
||||
private void writePackedInts(int docCount) throws IOException {
|
||||
initDataOut(PACKED);
|
||||
datOut.writeLong(minValue);
|
||||
assert array.type() == ValueType.FIXED_INTS_64;
|
||||
final long[] docToValue = (long[])array.getArray();
|
||||
// write a default value to recognize docs without a value for that
|
||||
// field
|
||||
final long defaultValue = maxValue >= 0 && minValue <= 0 ? 0 - minValue
|
||||
: ++maxValue - minValue;
|
||||
datOut.writeLong(defaultValue);
|
||||
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
|
||||
PackedInts.bitsRequired(maxValue - minValue));
|
||||
final int limit = docToValue.length > docCount ? docCount
|
||||
: docToValue.length;
|
||||
for (int i = 0; i < limit; i++) {
|
||||
w.add(docToValue[i] == 0 ? defaultValue : docToValue[i] - minValue);
|
||||
}
|
||||
for (int i = limit; i < docCount; i++) {
|
||||
w.add(defaultValue);
|
||||
}
|
||||
|
||||
w.finish();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setNextEnum(ValuesEnum valuesEnum) {
|
||||
intsRef = valuesEnum.getInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||
add(docID, docValues.getInt());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(Collection<String> files) throws IOException {
|
||||
files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens all necessary files, but does not read any data in until you call
|
||||
* {@link #load}.
|
||||
*/
|
||||
static class IntsReader extends IndexDocValues {
|
||||
private final IndexInput datIn;
|
||||
private final byte type;
|
||||
private final int numDocs;
|
||||
|
||||
protected IntsReader(Directory dir, String id, int numDocs, IOContext context) throws IOException {
|
||||
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||
Writer.DATA_EXTENSION), context);
|
||||
this.numDocs = numDocs;
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
type = datIn.readByte();
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(datIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int transferTo(IndexOutput datOut) throws IOException {
|
||||
IndexInput indexInput = (IndexInput) datIn.clone();
|
||||
boolean success = false;
|
||||
try {
|
||||
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
// skip type
|
||||
indexInput.readByte();
|
||||
datOut.copyBytes(indexInput, bytesPerValue(type) * numDocs);
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(indexInput);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(indexInput);
|
||||
}
|
||||
}
|
||||
return numDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the actual values. You may call this more than once, eg if you
|
||||
* already previously loaded but then discarded the Source.
|
||||
*/
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
boolean success = false;
|
||||
final Source source;
|
||||
IndexInput input = null;
|
||||
try {
|
||||
input = (IndexInput) datIn.clone();
|
||||
input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
|
||||
source = loadFixedSource(type, input, numDocs);
|
||||
success = true;
|
||||
return source;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input, datIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
datIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
final IndexInput input = (IndexInput) datIn.clone();
|
||||
boolean success = false;
|
||||
try {
|
||||
input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
|
||||
final ValuesEnum inst = directEnum(type, source, input, numDocs);
|
||||
success = true;
|
||||
return inst;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.VAR_INTS;
|
||||
}
|
||||
}
|
||||
|
||||
private static ValuesEnum directEnum(byte ord, AttributeSource attrSource, IndexInput input, int numDocs) throws IOException {
|
||||
switch (ord) {
|
||||
case FIXED_16:
|
||||
return new ShortValues((Counter)null).getDirectEnum(attrSource, input, numDocs);
|
||||
case FIXED_32:
|
||||
return new IntValues((Counter)null).getDirectEnum(attrSource, input, numDocs);
|
||||
case FIXED_64:
|
||||
return new LongValues((Counter)null).getDirectEnum(attrSource, input, numDocs);
|
||||
case FIXED_8:
|
||||
return new ByteValues((Counter)null).getDirectEnum(attrSource, input, numDocs);
|
||||
case PACKED:
|
||||
return new PackedIntsEnumImpl(attrSource, input);
|
||||
default:
|
||||
throw new IllegalStateException("unknown type ordinal " + ord);
|
||||
}
|
||||
}
|
||||
|
||||
private static IndexDocValues.Source loadFixedSource(byte ord, IndexInput input, int numDoc) throws IOException {
|
||||
switch (ord) {
|
||||
case FIXED_16:
|
||||
return new ShortValues(input, numDoc);
|
||||
case FIXED_32:
|
||||
return new IntValues(input, numDoc);
|
||||
case FIXED_64:
|
||||
return new LongValues(input, numDoc);
|
||||
case FIXED_8:
|
||||
return new ByteValues(input, numDoc);
|
||||
case PACKED:
|
||||
return new PackedIntsSource(input);
|
||||
default:
|
||||
throw new IllegalStateException("unknown type ordinal " + ord);
|
||||
}
|
||||
}
|
||||
|
||||
private static int bytesPerValue(byte typeOrd) {
|
||||
final int numBytes;
|
||||
switch (typeOrd) {
|
||||
case FIXED_16:
|
||||
numBytes = 2;
|
||||
break;
|
||||
case FIXED_32:
|
||||
numBytes = 4;
|
||||
break;
|
||||
case FIXED_64:
|
||||
numBytes = 8;
|
||||
break;
|
||||
case FIXED_8:
|
||||
numBytes = 1;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type ord " + typeOrd);
|
||||
}
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
static class PackedIntsSource extends Source {
|
||||
private final long minValue;
|
||||
private final long defaultValue;
|
||||
private final PackedInts.Reader values;
|
||||
|
||||
public PackedIntsSource(IndexInput dataIn) throws IOException {
|
||||
|
||||
minValue = dataIn.readLong();
|
||||
defaultValue = dataIn.readLong();
|
||||
values = PackedInts.getReader(dataIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
// TODO -- can we somehow avoid 2X method calls
|
||||
// on each get? must push minValue down, and make
|
||||
// PackedInts implement Ints.Source
|
||||
assert docID >= 0;
|
||||
final long value = values.get(docID);
|
||||
return value == defaultValue ? 0 : minValue + value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, values.size()) {
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs)
|
||||
return pos = NO_MORE_DOCS;
|
||||
intsRef.ints[intsRef.offset] = source.getInt(target);
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.VAR_INTS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static final class PackedIntsEnumImpl extends ValuesEnum {
|
||||
private final PackedInts.ReaderIterator ints;
|
||||
private long minValue;
|
||||
private final IndexInput dataIn;
|
||||
private final long defaultValue;
|
||||
private final int maxDoc;
|
||||
private int pos = -1;
|
||||
|
||||
private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
|
||||
throws IOException {
|
||||
super(source, ValueType.VAR_INTS);
|
||||
intsRef.offset = 0;
|
||||
this.dataIn = dataIn;
|
||||
minValue = dataIn.readLong();
|
||||
defaultValue = dataIn.readLong();
|
||||
this.ints = PackedInts.getReaderIterator(dataIn);
|
||||
maxDoc = ints.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
ints.close();
|
||||
dataIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
final long val = ints.advance(target);
|
||||
intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -0,0 +1,335 @@
|
|||
package org.apache.lucene.index.values;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.values.FixedStraightBytesImpl.FixedBytesWriterBase;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Stores integers using {@link PackedInts}
|
||||
*
|
||||
* @lucene.experimental
|
||||
* */
|
||||
class PackedIntValues {
|
||||
|
||||
private static final String CODEC_NAME = "PackedInts";
|
||||
private static final byte PACKED = 0x00;
|
||||
private static final byte FIXED_64 = 0x01;
|
||||
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static class PackedIntsWriter extends FixedBytesWriterBase {
|
||||
|
||||
private LongsRef intsRef;
|
||||
private long minValue;
|
||||
private long maxValue;
|
||||
private boolean started;
|
||||
private int lastDocId = -1;
|
||||
|
||||
protected PackedIntsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
bytesRef = new BytesRef(8);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, long v) throws IOException {
|
||||
assert lastDocId < docID;
|
||||
if (!started) {
|
||||
started = true;
|
||||
minValue = maxValue = v;
|
||||
} else {
|
||||
if (v < minValue) {
|
||||
minValue = v;
|
||||
} else if (v > maxValue) {
|
||||
maxValue = v;
|
||||
}
|
||||
}
|
||||
lastDocId = docID;
|
||||
bytesRef.copy(v);
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
final IndexOutput dataOut = getOrCreateDataOut();
|
||||
try {
|
||||
if (!started) {
|
||||
minValue = maxValue = 0;
|
||||
}
|
||||
final long delta = maxValue - minValue;
|
||||
// if we exceed the range of positive longs we must switch to fixed
|
||||
// ints
|
||||
if (delta <= (maxValue >= 0 && minValue <= 0 ? Long.MAX_VALUE
|
||||
: Long.MAX_VALUE - 1) && delta >= 0) {
|
||||
dataOut.writeByte(PACKED);
|
||||
writePackedInts(dataOut, docCount);
|
||||
return; // done
|
||||
} else {
|
||||
dataOut.writeByte(FIXED_64);
|
||||
}
|
||||
writeData(dataOut);
|
||||
writeZeros(docCount - (lastDocID + 1), dataOut);
|
||||
success = true;
|
||||
} finally {
|
||||
resetPool();
|
||||
if (success) {
|
||||
IOUtils.close(dataOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(dataOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
add(docID, intsRef.get());
|
||||
}
|
||||
|
||||
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
|
||||
datOut.writeLong(minValue);
|
||||
|
||||
// write a default value to recognize docs without a value for that
|
||||
// field
|
||||
final long defaultValue = maxValue >= 0 && minValue <= 0 ? 0 - minValue
|
||||
: ++maxValue - minValue;
|
||||
datOut.writeLong(defaultValue);
|
||||
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
|
||||
PackedInts.bitsRequired(maxValue - minValue));
|
||||
for (int i = 0; i < lastDocID + 1; i++) {
|
||||
set(bytesRef, i);
|
||||
long asLong = bytesRef.asLong();
|
||||
w.add(asLong == 0 ? defaultValue : asLong - minValue);
|
||||
}
|
||||
for (int i = lastDocID + 1; i < docCount; i++) {
|
||||
w.add(defaultValue);
|
||||
}
|
||||
w.finish();
|
||||
w.finish();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setNextEnum(ValuesEnum valuesEnum) {
|
||||
intsRef = valuesEnum.getInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||
add(docID, docValues.getInt());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens all necessary files, but does not read any data in until you call
|
||||
* {@link #load}.
|
||||
*/
|
||||
static class PackedIntsReader extends IndexDocValues {
|
||||
private final IndexInput datIn;
|
||||
private final byte type;
|
||||
private final int numDocs;
|
||||
private final LongValues values;
|
||||
|
||||
protected PackedIntsReader(Directory dir, String id, int numDocs,
|
||||
IOContext context) throws IOException {
|
||||
datIn = dir.openInput(
|
||||
IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION),
|
||||
context);
|
||||
this.numDocs = numDocs;
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
type = datIn.readByte();
|
||||
values = type == FIXED_64 ? new LongValues() : null;
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(datIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Loads the actual values. You may call this more than once, eg if you
|
||||
* already previously loaded but then discarded the Source.
|
||||
*/
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
boolean success = false;
|
||||
final Source source;
|
||||
IndexInput input = null;
|
||||
try {
|
||||
input = (IndexInput) datIn.clone();
|
||||
|
||||
if (values == null) {
|
||||
source = new PackedIntsSource(input);
|
||||
} else {
|
||||
source = values.newFromInput(input, numDocs);
|
||||
}
|
||||
success = true;
|
||||
return source;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input, datIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
datIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
final IndexInput input = (IndexInput) datIn.clone();
|
||||
boolean success = false;
|
||||
try {
|
||||
final ValuesEnum inst;
|
||||
if (values == null) {
|
||||
inst = new PackedIntsEnumImpl(source, input);
|
||||
} else {
|
||||
inst = values.getDirectEnum(source, input, numDocs);
|
||||
}
|
||||
success = true;
|
||||
return inst;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.VAR_INTS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static class PackedIntsSource extends Source {
|
||||
private final long minValue;
|
||||
private final long defaultValue;
|
||||
private final PackedInts.Reader values;
|
||||
|
||||
public PackedIntsSource(IndexInput dataIn) throws IOException {
|
||||
|
||||
minValue = dataIn.readLong();
|
||||
defaultValue = dataIn.readLong();
|
||||
values = PackedInts.getReader(dataIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
// TODO -- can we somehow avoid 2X method calls
|
||||
// on each get? must push minValue down, and make
|
||||
// PackedInts implement Ints.Source
|
||||
assert docID >= 0;
|
||||
final long value = values.get(docID);
|
||||
return value == defaultValue ? 0 : minValue + value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, values.size()) {
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs)
|
||||
return pos = NO_MORE_DOCS;
|
||||
intsRef.ints[intsRef.offset] = source.getInt(target);
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.VAR_INTS;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class PackedIntsEnumImpl extends ValuesEnum {
|
||||
private final PackedInts.ReaderIterator ints;
|
||||
private long minValue;
|
||||
private final IndexInput dataIn;
|
||||
private final long defaultValue;
|
||||
private final int maxDoc;
|
||||
private int pos = -1;
|
||||
|
||||
private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
|
||||
throws IOException {
|
||||
super(source, ValueType.VAR_INTS);
|
||||
intsRef.offset = 0;
|
||||
this.dataIn = dataIn;
|
||||
minValue = dataIn.readLong();
|
||||
defaultValue = dataIn.readLong();
|
||||
this.ints = PackedInts.getReaderIterator(dataIn);
|
||||
maxDoc = ints.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
ints.close();
|
||||
dataIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
final long val = ints.advance(target);
|
||||
intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -44,9 +44,9 @@ import org.apache.lucene.util.LongsRef;
|
|||
public abstract class ValuesEnum extends DocIdSetIterator {
|
||||
private AttributeSource source;
|
||||
private final ValueType enumType;
|
||||
protected BytesRef bytesRef;
|
||||
protected FloatsRef floatsRef;
|
||||
protected LongsRef intsRef;
|
||||
protected BytesRef bytesRef = new BytesRef(1);
|
||||
protected FloatsRef floatsRef = new FloatsRef(1);
|
||||
protected LongsRef intsRef = new LongsRef(1);
|
||||
|
||||
/**
|
||||
* Creates a new {@link ValuesEnum} for the given type. The
|
||||
|
@ -62,28 +62,6 @@ public abstract class ValuesEnum extends DocIdSetIterator {
|
|||
protected ValuesEnum(AttributeSource source, ValueType enumType) {
|
||||
this.source = source;
|
||||
this.enumType = enumType;
|
||||
switch (enumType) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
bytesRef = new BytesRef();
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
intsRef = new LongsRef(1);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
floatsRef = new FloatsRef(1);
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -19,29 +19,17 @@ package org.apache.lucene.index.values;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// Stores variable-length byte[] by deref, ie when two docs
|
||||
// have the same value, they store only 1 byte[] and both
|
||||
|
@ -56,51 +44,6 @@ class VarDerefBytesImpl {
|
|||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
private static final class AddressByteStartArray extends
|
||||
TrackingDirectBytesStartArray {
|
||||
int[] address;
|
||||
|
||||
AddressByteStartArray(int size, Counter bytesUsed) {
|
||||
super(size, bytesUsed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Counter bytesUsed() {
|
||||
return bytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] clear() {
|
||||
if (address != null) {
|
||||
bytesUsed.addAndGet(-address.length * RamUsageEstimator.NUM_BYTES_INT);
|
||||
address = null;
|
||||
}
|
||||
return super.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] grow() {
|
||||
assert address != null;
|
||||
final int oldSize = address.length;
|
||||
final int[] retVal = super.grow();
|
||||
address = ArrayUtil.grow(address, retVal.length);
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||
* (address.length - oldSize));
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] init() {
|
||||
if (address == null) {
|
||||
address = new int[ArrayUtil.oversize(initSize,
|
||||
RamUsageEstimator.NUM_BYTES_INT)];
|
||||
bytesUsed.addAndGet((address.length) * RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
return super.init();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: if impls like this are merged we are bound to the amount of memory we
|
||||
* can store into a BytesRefHash and therefore how much memory a ByteBlockPool
|
||||
|
@ -110,170 +53,66 @@ class VarDerefBytesImpl {
|
|||
* move the byte[] writing to #finish(int) and store the bytes in sorted
|
||||
* order and merge them in a streamed fashion.
|
||||
*/
|
||||
static class Writer extends BytesWriterBase {
|
||||
private int[] docToAddress;
|
||||
private int address = 1;
|
||||
|
||||
private final AddressByteStartArray array = new AddressByteStartArray(1,
|
||||
bytesUsed);
|
||||
private final BytesRefHash hash;
|
||||
|
||||
static class Writer extends DerefBytesWriterBase {
|
||||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
this(dir, id, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
|
||||
bytesUsed, context);
|
||||
}
|
||||
|
||||
public Writer(Directory dir, String id, Allocator allocator,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
hash = new BytesRefHash(new ByteBlockPool(allocator), 16, array);
|
||||
docToAddress = new int[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void add(int docID, BytesRef bytes) throws IOException {
|
||||
if (bytes.length == 0)
|
||||
return; // default
|
||||
final int e = hash.add(bytes);
|
||||
|
||||
if (docID >= docToAddress.length) {
|
||||
final int oldSize = docToAddress.length;
|
||||
docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||
* (docToAddress.length - oldSize));
|
||||
}
|
||||
final int docAddress;
|
||||
if (e >= 0) {
|
||||
docAddress = array.address[e] = address;
|
||||
address += bytes.length < 128 ? 1 : 2;
|
||||
address += bytes.length;
|
||||
} else {
|
||||
docAddress = array.address[(-e) - 1];
|
||||
}
|
||||
docToAddress[docID] = docAddress;
|
||||
}
|
||||
|
||||
private static int writePrefixLength(DataOutput datOut, BytesRef bytes)
|
||||
throws IOException {
|
||||
if (bytes.length < 128) {
|
||||
datOut.writeByte((byte) bytes.length);
|
||||
return 1;
|
||||
} else {
|
||||
datOut.writeByte((byte) (0x80 | (bytes.length >> 8)));
|
||||
datOut.writeByte((byte) (bytes.length & 0xff));
|
||||
return 2;
|
||||
}
|
||||
protected void checkSize(BytesRef bytes) {
|
||||
// allow var bytes sizes
|
||||
}
|
||||
|
||||
// Important that we get docCount, in case there were
|
||||
// some last docs that we didn't see
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
final IndexOutput datOut = getDataOut();
|
||||
boolean success = false;
|
||||
try {
|
||||
final int size = hash.size();
|
||||
final BytesRef bytesRef = new BytesRef();
|
||||
for (int i = 0; i < size; i++) {
|
||||
hash.get(i, bytesRef);
|
||||
writePrefixLength(datOut, bytesRef);
|
||||
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
hash.close();
|
||||
if (success) {
|
||||
IOUtils.close(datOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
}
|
||||
|
||||
final IndexOutput idxOut = getIndexOut();
|
||||
success = false;
|
||||
try {
|
||||
idxOut.writeInt(address - 1);
|
||||
// write index
|
||||
// TODO(simonw): -- allow forcing fixed array (not -1)
|
||||
// TODO(simonw): check the address calculation / make it more intuitive
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
PackedInts.bitsRequired(address - 1));
|
||||
final int limit;
|
||||
if (docCount > docToAddress.length) {
|
||||
limit = docToAddress.length;
|
||||
} else {
|
||||
limit = docCount;
|
||||
}
|
||||
for (int i = 0; i < limit; i++) {
|
||||
w.add(docToAddress[i]);
|
||||
}
|
||||
for (int i = limit; i < docCount; i++) {
|
||||
w.add(0);
|
||||
}
|
||||
w.finish();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(idxOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(idxOut);
|
||||
}
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||
* (-docToAddress.length));
|
||||
docToAddress = null;
|
||||
public void finishInternal(int docCount) throws IOException {
|
||||
final int size = hash.size();
|
||||
final long[] addresses = new long[size+1];
|
||||
final IndexOutput datOut = getOrCreateDataOut();
|
||||
int addr = 1;
|
||||
final BytesRef bytesRef = new BytesRef();
|
||||
for (int i = 0; i < size; i++) {
|
||||
hash.get(i, bytesRef);
|
||||
addresses[i+1] = addr;
|
||||
addr += writePrefixLength(datOut, bytesRef) + bytesRef.length;
|
||||
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
|
||||
}
|
||||
|
||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
// write the max address to read directly on source load
|
||||
idxOut.writeLong(addr - 1);
|
||||
writeIndex(idxOut, docCount, addresses[size], addresses, docToEntry);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
|
||||
private final long totalBytes;
|
||||
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
totalBytes = idxIn.readLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
final IndexInput data = cloneData();
|
||||
final IndexInput index = cloneIndex();
|
||||
data.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
index.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||
final long totalBytes = index.readInt(); // should be long
|
||||
return new Source(data, index, totalBytes);
|
||||
return new Source(cloneData(), cloneIndex(), totalBytes);
|
||||
}
|
||||
|
||||
private static class Source extends BytesBaseSource {
|
||||
private final PackedInts.Reader index;
|
||||
private final static class Source extends DerefBytesSourceBase {
|
||||
|
||||
public Source(IndexInput datIn, IndexInput idxIn, long totalBytes)
|
||||
throws IOException {
|
||||
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), totalBytes);
|
||||
index = PackedInts.getReader(idxIn);
|
||||
super(datIn, idxIn, totalBytes, ValueType.BYTES_VAR_DEREF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
long address = index.get(docID);
|
||||
long address = addresses.get(docID);
|
||||
bytesRef.length = 0;
|
||||
return address == 0 ? bytesRef : data.fillSliceWithPrefix(bytesRef,
|
||||
--address);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_VAR_DEREF;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return index.size();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -281,8 +120,8 @@ class VarDerefBytesImpl {
|
|||
return new VarDerefBytesEnum(source, cloneData(), cloneIndex());
|
||||
}
|
||||
|
||||
static class VarDerefBytesEnum extends DerefBytesEnum {
|
||||
|
||||
final static class VarDerefBytesEnum extends DerefBytesEnumBase {
|
||||
|
||||
public VarDerefBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn) throws IOException {
|
||||
super(source, datIn, idxIn, -1, ValueType.BYTES_VAR_DEREF);
|
||||
|
@ -299,8 +138,9 @@ class VarDerefBytesImpl {
|
|||
} else {
|
||||
size = ((sizeByte & 0x7f) << 8) | ((datIn.readByte() & 0xff));
|
||||
}
|
||||
if (ref.bytes.length < size)
|
||||
if (ref.bytes.length < size) {
|
||||
ref.grow(size);
|
||||
}
|
||||
ref.length = size;
|
||||
ref.offset = 0;
|
||||
datIn.readBytes(ref.bytes, 0, size);
|
||||
|
|
|
@ -18,28 +18,18 @@ package org.apache.lucene.index.values;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
|
||||
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// Stores variable-length byte[] by deref, ie when two docs
|
||||
|
@ -55,130 +45,66 @@ class VarSortedBytesImpl {
|
|||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static class Writer extends BytesWriterBase {
|
||||
private int[] docToEntry;
|
||||
final static class Writer extends DerefBytesWriterBase {
|
||||
private final Comparator<BytesRef> comp;
|
||||
|
||||
private final BytesRefHash hash;
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
|
||||
bytesUsed, context);
|
||||
}
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
Allocator allocator, Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
this.hash = new BytesRefHash(new ByteBlockPool(allocator),
|
||||
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||
this.comp = comp;
|
||||
docToEntry = new int[1];
|
||||
docToEntry[0] = -1;
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void add(int docID, BytesRef bytes) throws IOException {
|
||||
if (bytes.length == 0)
|
||||
return;// default
|
||||
if (docID >= docToEntry.length) {
|
||||
int[] newArray = new int[ArrayUtil.oversize(1 + docID,
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
|
||||
Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
|
||||
bytesUsed.addAndGet((newArray.length - docToEntry.length)
|
||||
* RamUsageEstimator.NUM_BYTES_INT);
|
||||
docToEntry = newArray;
|
||||
}
|
||||
final int e = hash.add(bytes);
|
||||
docToEntry[docID] = e < 0 ? (-e) - 1 : e;
|
||||
protected void checkSize(BytesRef bytes) {
|
||||
// allow var bytes sizes
|
||||
}
|
||||
|
||||
// Important that we get docCount, in case there were
|
||||
// some last docs that we didn't see
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
public void finishInternal(int docCount) throws IOException {
|
||||
final int count = hash.size();
|
||||
final IndexOutput datOut = getDataOut();
|
||||
final IndexOutput datOut = getOrCreateDataOut();
|
||||
long offset = 0;
|
||||
long lastOffset = 0;
|
||||
final int[] index = new int[count];
|
||||
final int[] index = new int[count+1];
|
||||
final long[] offsets = new long[count];
|
||||
boolean success = false;
|
||||
try {
|
||||
final int[] sortedEntries = hash.sort(comp);
|
||||
// first dump bytes data, recording index & offset as
|
||||
// we go
|
||||
for (int i = 0; i < count; i++) {
|
||||
final int e = sortedEntries[i];
|
||||
offsets[i] = offset;
|
||||
index[e] = 1 + i;
|
||||
final int[] sortedEntries = hash.sort(comp);
|
||||
// first dump bytes data, recording index & offset as
|
||||
// we go
|
||||
for (int i = 0; i < count; i++) {
|
||||
final int e = sortedEntries[i];
|
||||
offsets[i] = offset;
|
||||
index[e+1] = 1 + i;
|
||||
|
||||
final BytesRef bytes = hash.get(e, new BytesRef());
|
||||
// TODO: we could prefix code...
|
||||
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
lastOffset = offset;
|
||||
offset += bytes.length;
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(datOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(datOut);
|
||||
}
|
||||
hash.close();
|
||||
final BytesRef bytes = hash.get(e, new BytesRef());
|
||||
// TODO: we could prefix code...
|
||||
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
lastOffset = offset;
|
||||
offset += bytes.length;
|
||||
}
|
||||
final IndexOutput idxOut = getIndexOut();
|
||||
success = false;
|
||||
try {
|
||||
// total bytes of data
|
||||
idxOut.writeLong(offset);
|
||||
|
||||
// write index -- first doc -> 1+ord
|
||||
// TODO(simonw): allow not -1:
|
||||
final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut,
|
||||
docCount, PackedInts.bitsRequired(count));
|
||||
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||
: docCount;
|
||||
for (int i = 0; i < limit; i++) {
|
||||
final int e = docToEntry[i];
|
||||
indexWriter.add(e == -1 ? 0 : index[e]);
|
||||
}
|
||||
for (int i = limit; i < docCount; i++) {
|
||||
indexWriter.add(0);
|
||||
}
|
||||
indexWriter.finish();
|
||||
|
||||
// next ord (0-based) -> offset
|
||||
// TODO(simonw): -- allow not -1:
|
||||
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
|
||||
PackedInts.bitsRequired(lastOffset));
|
||||
for (int i = 0; i < count; i++) {
|
||||
offsetWriter.add(offsets[i]);
|
||||
}
|
||||
offsetWriter.finish();
|
||||
success = true;
|
||||
} finally {
|
||||
bytesUsed.addAndGet((-docToEntry.length)
|
||||
* RamUsageEstimator.NUM_BYTES_INT);
|
||||
docToEntry = null;
|
||||
if (success) {
|
||||
IOUtils.close(idxOut);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(idxOut);
|
||||
}
|
||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
// total bytes of data
|
||||
idxOut.writeLong(offset);
|
||||
// write index -- first doc -> 1+ord
|
||||
writeIndex(idxOut, docCount, count, index, docToEntry);
|
||||
// next ord (0-based) -> offset
|
||||
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
|
||||
PackedInts.bitsRequired(lastOffset));
|
||||
for (int i = 0; i < count; i++) {
|
||||
offsetWriter.add(offsets[i]);
|
||||
}
|
||||
offsetWriter.finish();
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
|
||||
private final Comparator<BytesRef> defaultComp;
|
||||
|
||||
Reader(Directory dir, String id, int maxDoc, Comparator<BytesRef> comparator, IOContext context) throws IOException {
|
||||
|
||||
Reader(Directory dir, String id, int maxDoc,
|
||||
Comparator<BytesRef> comparator, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
this.defaultComp = comparator;
|
||||
}
|
||||
|
@ -196,32 +122,25 @@ class VarSortedBytesImpl {
|
|||
return new Source(cloneData(), indexIn, comp, indexIn.readLong());
|
||||
}
|
||||
|
||||
private static class Source extends BytesBaseSortedSource {
|
||||
private final PackedInts.Reader docToOrdIndex;
|
||||
private static class Source extends BytesSortedSourceBase {
|
||||
private final PackedInts.Reader ordToOffsetIndex; // 0-based
|
||||
private final long totBytes;
|
||||
private final int valueCount;
|
||||
|
||||
public Source(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, long dataLength) throws IOException {
|
||||
super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), dataLength);
|
||||
super(datIn, idxIn, comp, dataLength, ValueType.BYTES_VAR_SORTED);
|
||||
totBytes = dataLength;
|
||||
docToOrdIndex = PackedInts.getReader(idxIn);
|
||||
ordToOffsetIndex = PackedInts.getReader(idxIn);
|
||||
valueCount = ordToOffsetIndex.size();
|
||||
closeIndexInput();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord(int docID) {
|
||||
return (int) docToOrdIndex.get(docID) - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
|
||||
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
|
@ -240,16 +159,6 @@ class VarSortedBytesImpl {
|
|||
data.fillSlice(bytesRef, offset, (int) (nextOffset - offset));
|
||||
return bytesRef;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_VAR_SORTED;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return docToOrdIndex.size();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,9 +19,9 @@ package org.apache.lucene.index.values;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -32,7 +32,6 @@ import org.apache.lucene.util.ByteBlockPool;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
@ -95,7 +94,7 @@ class VarStraightBytesImpl {
|
|||
@Override
|
||||
protected void merge(MergeState state) throws IOException {
|
||||
merge = true;
|
||||
datOut = getDataOut();
|
||||
datOut = getOrCreateDataOut();
|
||||
boolean success = false;
|
||||
try {
|
||||
if (state.liveDocs == null && state.reader instanceof Reader) {
|
||||
|
@ -166,7 +165,7 @@ class VarStraightBytesImpl {
|
|||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
assert (!merge && datOut == null) || (merge && datOut != null);
|
||||
final IndexOutput datOut = getDataOut();
|
||||
final IndexOutput datOut = getOrCreateDataOut();
|
||||
try {
|
||||
if (!merge) {
|
||||
// header is already written in getDataOut()
|
||||
|
@ -183,7 +182,7 @@ class VarStraightBytesImpl {
|
|||
}
|
||||
|
||||
success = false;
|
||||
final IndexOutput idxOut = getIndexOut();
|
||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
try {
|
||||
if (lastDocID == -1) {
|
||||
idxOut.writeVLong(0);
|
||||
|
@ -234,12 +233,10 @@ class VarStraightBytesImpl {
|
|||
return new Source(cloneData(), cloneIndex());
|
||||
}
|
||||
|
||||
private class Source extends BytesBaseSource {
|
||||
private final PackedInts.Reader addresses;
|
||||
private class Source extends DerefBytesSourceBase {
|
||||
|
||||
public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
|
||||
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong());
|
||||
addresses = PackedInts.getReader(idxIn);
|
||||
super(datIn, idxIn, idxIn.readVLong(), ValueType.BYTES_VAR_STRAIGHT);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -263,21 +260,6 @@ class VarStraightBytesImpl {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_VAR_STRAIGHT;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return addresses.size();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -243,6 +243,18 @@ public final class ByteBlockPool {
|
|||
assert term.length >= 0;
|
||||
return term;
|
||||
}
|
||||
/**
|
||||
* Dereferences the byte block according to {@link BytesRef} offset. The offset
|
||||
* is interpreted as the absolute offset into the {@link ByteBlockPool}.
|
||||
*/
|
||||
public final BytesRef deref(BytesRef bytes) {
|
||||
final int offset = bytes.offset;
|
||||
byte[] buffer = buffers[offset >> BYTE_BLOCK_SHIFT];
|
||||
int pos = offset & BYTE_BLOCK_MASK;
|
||||
bytes.bytes = buffer;
|
||||
bytes.offset = pos;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given {@link BytesRef} at the current positions (
|
||||
|
|
|
@ -238,6 +238,12 @@ public final class BytesRef implements Comparable<BytesRef> {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given {@link BytesRef}
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0 and resizes the reference array
|
||||
* if needed.
|
||||
*/
|
||||
public void copy(BytesRef other) {
|
||||
if (bytes.length < other.length) {
|
||||
bytes = new byte[other.length];
|
||||
|
@ -247,6 +253,93 @@ public final class BytesRef implements Comparable<BytesRef> {
|
|||
offset = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given long value and encodes it as 8 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 8 and resizes the reference array
|
||||
* if needed.
|
||||
*/
|
||||
public void copy(long value) {
|
||||
if (bytes.length < 8) {
|
||||
bytes = new byte[8];
|
||||
}
|
||||
copyInternal((int) (value >> 32), offset = 0);
|
||||
copyInternal((int) value, 4);
|
||||
length = 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given int value and encodes it as 4 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 4 and resizes the reference array
|
||||
* if needed.
|
||||
*/
|
||||
public void copy(int value) {
|
||||
if (bytes.length < 4) {
|
||||
bytes = new byte[4];
|
||||
}
|
||||
copyInternal(value, offset = 0);
|
||||
length = 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the given short value and encodes it as a 2 byte Big-Endian.
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0, length to 2 and resizes the reference array
|
||||
* if needed.
|
||||
*/
|
||||
public void copy(short value) {
|
||||
if (bytes.length < 2) {
|
||||
bytes = new byte[2];
|
||||
}
|
||||
bytes[offset] = (byte) (value >> 8);
|
||||
bytes[offset + 1] = (byte) (value);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 2 consecutive bytes from the current offset to a short. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public short asShort() {
|
||||
int pos = offset;
|
||||
return (short) (0xFFFF & ((bytes[pos++] & 0xFF) << 8) | (bytes[pos] & 0xFF));
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 4 consecutive bytes from the current offset to an int. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public int asInt() {
|
||||
return asIntInternal(offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts 8 consecutive bytes from the current offset to a long. Bytes are
|
||||
* interpreted as Big-Endian (most significant bit first)
|
||||
* <p>
|
||||
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
|
||||
*/
|
||||
public long asLong() {
|
||||
return (((long) asIntInternal(offset) << 32) | asIntInternal(offset + 4) & 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
private void copyInternal(int value, int startOffset) {
|
||||
bytes[startOffset] = (byte) (value >> 24);
|
||||
bytes[startOffset + 1] = (byte) (value >> 16);
|
||||
bytes[startOffset + 2] = (byte) (value >> 8);
|
||||
bytes[startOffset + 3] = (byte) (value);
|
||||
}
|
||||
|
||||
private int asIntInternal(int pos) {
|
||||
return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16)
|
||||
| ((bytes[pos++] & 0xFF) << 8) | (bytes[pos] & 0xFF);
|
||||
}
|
||||
|
||||
public void append(BytesRef other) {
|
||||
int newLen = length + other.length;
|
||||
if (bytes.length < newLen) {
|
||||
|
@ -284,7 +377,7 @@ public final class BytesRef implements Comparable<BytesRef> {
|
|||
// One is a prefix of the other, or, they are equal:
|
||||
return this.length - other.length;
|
||||
}
|
||||
|
||||
|
||||
private final static Comparator<BytesRef> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
|
||||
|
||||
public static Comparator<BytesRef> getUTF8SortedAsUnicodeComparator() {
|
||||
|
|
|
@ -188,7 +188,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.add(1, minMax[i][1]);
|
||||
w.finish(2);
|
||||
assertEquals(0, trackBytes.get());
|
||||
IndexDocValues r = Ints.getValues(dir, "test", 2, newIOContext(random));
|
||||
IndexDocValues r = Ints.getValues(dir, "test", 2, ValueType.VAR_INTS, newIOContext(random));
|
||||
Source source = getSource(r);
|
||||
assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
|
||||
expectedTypes[i], source.type());
|
||||
|
@ -229,7 +229,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.add(i, (long) sourceArray[i]);
|
||||
}
|
||||
w.finish(sourceArray.length);
|
||||
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, newIOContext(random));
|
||||
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, ValueType.FIXED_INTS_8, newIOContext(random));
|
||||
Source source = r.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
byte[] loaded = ((byte[])source.getArray());
|
||||
|
@ -250,7 +250,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.add(i, (long) sourceArray[i]);
|
||||
}
|
||||
w.finish(sourceArray.length);
|
||||
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, newIOContext(random));
|
||||
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, ValueType.FIXED_INTS_16, newIOContext(random));
|
||||
Source source = r.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
short[] loaded = ((short[])source.getArray());
|
||||
|
@ -271,7 +271,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.add(i, sourceArray[i]);
|
||||
}
|
||||
w.finish(sourceArray.length);
|
||||
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, newIOContext(random));
|
||||
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, ValueType.FIXED_INTS_64, newIOContext(random));
|
||||
Source source = r.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
long[] loaded = ((long[])source.getArray());
|
||||
|
@ -292,7 +292,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.add(i, (long) sourceArray[i]);
|
||||
}
|
||||
w.finish(sourceArray.length);
|
||||
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, newIOContext(random));
|
||||
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, ValueType.FIXED_INTS_32, newIOContext(random));
|
||||
Source source = r.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
int[] loaded = ((int[])source.getArray());
|
||||
|
@ -363,7 +363,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.finish(NUM_VALUES + additionalDocs);
|
||||
assertEquals(0, trackBytes.get());
|
||||
|
||||
IndexDocValues r = Ints.getValues(dir, "test", NUM_VALUES + additionalDocs, newIOContext(random));
|
||||
IndexDocValues r = Ints.getValues(dir, "test", NUM_VALUES + additionalDocs, type, newIOContext(random));
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
Source s = getSource(r);
|
||||
assertEquals(type, s.type());
|
||||
|
|
Loading…
Reference in New Issue