LUCENE-3231: Add fixed size DocValues int variants & expose Arrays where possible

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1140047 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-06-27 08:21:22 +00:00
parent 28c15b9637
commit 22f37cf1f1
18 changed files with 1178 additions and 337 deletions

View File

@ -98,12 +98,99 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
/**
* Sets the given <code>long</code> value and sets the field's {@link ValueType} to
* {@link ValueType#INTS} unless already set. If you want to change the
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
*/
public void setInt(long value) {
setInt(value, false);
}
/**
* Sets the given <code>long</code> value as a 64 bit signed integer.
*
* @param value
* the value to set
* @param fixed
* if <code>true</code> {@link ValueType#FIXED_INTS_64} is used
* otherwise {@link ValueType#VAR_INTS}
*/
public void setInt(long value, boolean fixed) {
if (type == null) {
type = ValueType.INTS;
type = fixed ? ValueType.FIXED_INTS_64 : ValueType.VAR_INTS;
}
longValue = value;
}
/**
* Sets the given <code>int</code> value and sets the field's {@link ValueType} to
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
*/
public void setInt(int value) {
setInt(value, false);
}
/**
* Sets the given <code>int</code> value as a 32 bit signed integer.
*
* @param value
* the value to set
* @param fixed
* if <code>true</code> {@link ValueType#FIXED_INTS_32} is used
* otherwise {@link ValueType#VAR_INTS}
*/
public void setInt(int value, boolean fixed) {
if (type == null) {
type = fixed ? ValueType.FIXED_INTS_32 : ValueType.VAR_INTS;
}
longValue = value;
}
/**
* Sets the given <code>short</code> value and sets the field's {@link ValueType} to
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
*/
public void setInt(short value) {
setInt(value, false);
}
/**
* Sets the given <code>short</code> value as a 16 bit signed integer.
*
* @param value
* the value to set
* @param fixed
* if <code>true</code> {@link ValueType#FIXED_INTS_16} is used
* otherwise {@link ValueType#VAR_INTS}
*/
public void setInt(short value, boolean fixed) {
if (type == null) {
type = fixed ? ValueType.FIXED_INTS_16 : ValueType.VAR_INTS;
}
longValue = value;
}
/**
* Sets the given <code>byte</code> value and sets the field's {@link ValueType} to
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
*/
public void setInt(byte value) {
setInt(value, false);
}
/**
* Sets the given <code>byte</code> value as a 8 bit signed integer.
*
* @param value
* the value to set
* @param fixed
* if <code>true</code> {@link ValueType#FIXED_INTS_8} is used
* otherwise {@link ValueType#VAR_INTS}
*/
public void setInt(byte value, boolean fixed) {
if (type == null) {
type = fixed ? ValueType.FIXED_INTS_8 : ValueType.VAR_INTS;
}
longValue = value;
}
@ -268,7 +355,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
field.stringValue());
valField.setBytes(ref, type);
break;
case INTS:
case VAR_INTS:
valField.setInt(Long.parseLong(field.stringValue()));
break;
case FLOAT_32:

View File

@ -1025,7 +1025,11 @@ public class CheckIndex {
case FLOAT_64:
values.getFloat();
break;
case INTS:
case VAR_INTS:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
values.getInt();
break;
default:

View File

@ -609,7 +609,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
b = 0;
} else {
switch(fi.docValues) {
case INTS:
case VAR_INTS:
b = 1;
break;
case FLOAT_32:
@ -636,6 +636,19 @@ public final class FieldInfos implements Iterable<FieldInfo> {
case BYTES_VAR_SORTED:
b = 9;
break;
case FIXED_INTS_16:
b = 10;
break;
case FIXED_INTS_32:
b = 11;
break;
case FIXED_INTS_64:
b = 12;
break;
case FIXED_INTS_8:
b = 13;
break;
default:
throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
}
@ -686,7 +699,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
docValuesType = null;
break;
case 1:
docValuesType = ValueType.INTS;
docValuesType = ValueType.VAR_INTS;
break;
case 2:
docValuesType = ValueType.FLOAT_32;
@ -712,6 +725,19 @@ public final class FieldInfos implements Iterable<FieldInfo> {
case 9:
docValuesType = ValueType.BYTES_VAR_SORTED;
break;
case 10:
docValuesType = ValueType.FIXED_INTS_16;
break;
case 11:
docValuesType = ValueType.FIXED_INTS_32;
break;
case 12:
docValuesType = ValueType.FIXED_INTS_64;
break;
case 13:
docValuesType = ValueType.FIXED_INTS_8;
break;
default:
throw new IllegalStateException("unhandled indexValues type " + b);
}

View File

@ -80,12 +80,17 @@ public class DefaultDocValuesConsumer extends PerDocConsumer {
case BYTES_FIXED_STRAIGHT:
case FLOAT_32:
case FLOAT_64:
case INTS:
case VAR_INTS:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
files.add(IndexFileNames.segmentFileName(filename, "",
Writer.DATA_EXTENSION));
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
Writer.DATA_EXTENSION));
break;
default:
assert false;
}

View File

@ -121,8 +121,12 @@ public class DefaultDocValuesProducer extends PerDocValues {
protected IndexDocValues loadDocValues(int docCount, Directory dir, String id,
ValueType type) throws IOException {
switch (type) {
case INTS:
return Ints.getValues(dir, id, false);
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getValues(dir, id);
case FLOAT_32:
return Floats.getValues(dir, id, docCount);
case FLOAT_64:

View File

@ -156,7 +156,7 @@ public class Floats {
}
// Writes 4 bytes (float) per value
static class Float4Writer extends FloatsWriter {
static final class Float4Writer extends FloatsWriter {
private int[] values;
protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed)
throws IOException {
@ -219,7 +219,7 @@ public class Floats {
}
// Writes 8 bytes (double) per value
static class Float8Writer extends FloatsWriter {
static final class Float8Writer extends FloatsWriter {
private long[] values;
protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed)
throws IOException {
@ -341,7 +341,7 @@ public class Floats {
}
}
private class Source4 extends Source {
private final class Source4 extends Source {
private final float[] values;
Source4(final float[] values ) throws IOException {
@ -367,13 +367,23 @@ public class Floats {
};
}
@Override
public Object getArray() {
return this.values;
}
@Override
public boolean hasArray() {
return true;
}
@Override
public ValueType type() {
return ValueType.FLOAT_32;
}
}
private class Source8 extends Source {
private final class Source8 extends Source {
private final double[] values;
Source8(final double[] values) throws IOException {
@ -403,6 +413,16 @@ public class Floats {
public ValueType type() {
return ValueType.FLOAT_64;
}
@Override
public Object getArray() {
return this.values;
}
@Override
public boolean hasArray() {
return true;
}
}
@Override

View File

@ -254,6 +254,25 @@ public abstract class IndexDocValues implements Closeable {
*/
public abstract ValuesEnum getEnum(AttributeSource attrSource)
throws IOException;
/**
* Returns <code>true</code> iff this {@link Source} exposes an array via
* {@link #getArray()} otherwise <code>false</code>.
*
* @return <code>true</code> iff this {@link Source} exposes an array via
* {@link #getArray()} otherwise <code>false</code>.
*/
public boolean hasArray() {
return false;
}
/**
* Returns the internal array representation iff this {@link Source} uses an
* array as its inner representation, otherwise <code>null</code>.
*/
public Object getArray() {
return null;
}
}
/**

View File

@ -0,0 +1,470 @@
package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
/**
* @lucene.experimental
*/
abstract class IndexDocValuesArray extends Source {
private final AtomicLong bytesUsed;
private final int bytesPerValue;
private int size = 0;
private final ValueType type;
protected int maxDocID = -1;
IndexDocValuesArray(AtomicLong bytesUsed, int bytesPerValue, ValueType type) {
this.bytesUsed = bytesUsed;
this.bytesPerValue = bytesPerValue;
this.type = type;
}
void set(int docId, long value) {
if (docId >= size) {
adjustSize(grow(docId + 1));
}
if (docId > maxDocID) {
maxDocID = docId;
}
setInternal(docId, value);
}
protected final void adjustSize(int newSize) {
bytesUsed.addAndGet(bytesPerValue * (newSize - size));
size = newSize;
}
void clear() {
adjustSize(0);
maxDocID = -1;
size = 0;
}
protected abstract void setInternal(int docId, long value);
protected abstract int grow(int numDocs);
abstract void write(IndexOutput output, int numDocs) throws IOException;
@Override
public final int getValueCount() {
return maxDocID + 1;
}
@Override
public final ValueType type() {
return type;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
intsRef.ints[intsRef.offset] = IndexDocValuesArray.this.getInt(target);
return pos = target;
}
};
}
abstract ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
throws IOException;
@Override
public final boolean hasArray() {
return true;
}
final static class ByteValues extends IndexDocValuesArray {
private byte[] values;
ByteValues(AtomicLong bytesUsed) {
super(bytesUsed, 1, ValueType.FIXED_INTS_8);
values = new byte[0];
}
ByteValues(IndexInput input) throws IOException {
super(new AtomicLong(), 1, ValueType.FIXED_INTS_8);
final int numDocs = input.readInt();
values = new byte[numDocs];
adjustSize(numDocs);
input.readBytes(values, 0, values.length, false);
maxDocID = numDocs - 1;
}
@Override
public byte[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
protected void setInternal(int docId, long value) {
values[docId] = (byte) (0xFFL & value);
}
@Override
protected int grow(int numDocs) {
values = ArrayUtil.grow(values, numDocs);
return values.length;
}
@Override
void write(IndexOutput output, int numDocs) throws IOException {
assert maxDocID + 1 <= numDocs;
output.writeInt(numDocs);
output.writeBytes(values, 0, maxDocID + 1);
final byte zero = 0;
for (int i = maxDocID + 1; i < numDocs; i++) {
output.writeByte(zero);
}
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
throws IOException {
return new FixedIntsEnumImpl(attrSource, input, type()) {
@Override
protected void fillNext(LongsRef ref, IndexInput dataIn)
throws IOException {
ref.ints[ref.offset] = dataIn.readByte();
}
};
}
@Override
void clear() {
super.clear();
values = new byte[0];
}
};
final static class ShortValues extends IndexDocValuesArray {
private short[] values;
ShortValues(AtomicLong bytesUsed) {
super(bytesUsed, RamUsageEstimator.NUM_BYTES_SHORT,
ValueType.FIXED_INTS_16);
values = new short[0];
}
ShortValues(IndexInput input) throws IOException {
super(new AtomicLong(), RamUsageEstimator.NUM_BYTES_SHORT,
ValueType.FIXED_INTS_16);
final int numDocs = input.readInt();
values = new short[numDocs];
adjustSize(numDocs);
for (int i = 0; i < values.length; i++) {
values[i] = input.readShort();
}
maxDocID = numDocs - 1;
}
@Override
public short[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
protected void setInternal(int docId, long value) {
values[docId] = (short) (0xFFFF & value);
}
@Override
protected int grow(int numDocs) {
values = ArrayUtil.grow(values, numDocs);
return values.length;
}
@Override
void write(IndexOutput output, int numDocs) throws IOException {
assert maxDocID + 1 <= numDocs;
output.writeInt(numDocs);
for (int i = 0; i < maxDocID + 1; i++) {
output.writeShort(values[i]);
}
final short zero = 0;
for (int i = maxDocID + 1; i < numDocs; i++) {
output.writeShort(zero);
}
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
throws IOException {
return new FixedIntsEnumImpl(attrSource, input, type()) {
@Override
protected void fillNext(LongsRef ref, IndexInput dataIn)
throws IOException {
ref.ints[ref.offset] = dataIn.readShort();
}
};
}
@Override
void clear() {
super.clear();
values = new short[0];
}
};
final static class IntValues extends IndexDocValuesArray {
private int[] values;
IntValues(AtomicLong bytesUsed) {
super(bytesUsed, RamUsageEstimator.NUM_BYTES_INT, ValueType.FIXED_INTS_32);
values = new int[0];
}
IntValues(IndexInput input) throws IOException {
super(new AtomicLong(), RamUsageEstimator.NUM_BYTES_INT,
ValueType.FIXED_INTS_32);
final int numDocs = input.readInt();
values = new int[numDocs];
adjustSize(numDocs);
for (int i = 0; i < values.length; i++) {
values[i] = input.readInt();
}
maxDocID = numDocs - 1;
}
@Override
public int[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return 0xFFFFFFFF & values[docID];
}
@Override
protected void setInternal(int docId, long value) {
values[docId] = (int) (0xFFFFFFFF & value);
}
@Override
protected int grow(int numDocs) {
values = ArrayUtil.grow(values, numDocs);
return values.length;
}
@Override
void write(IndexOutput output, int numDocs) throws IOException {
assert maxDocID + 1 <= numDocs;
output.writeInt(numDocs);
for (int i = 0; i < maxDocID + 1; i++) {
output.writeInt(values[i]);
}
for (int i = maxDocID + 1; i < numDocs; i++) {
output.writeInt(0);
}
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
throws IOException {
return new FixedIntsEnumImpl(attrSource, input, type()) {
@Override
protected void fillNext(LongsRef ref, IndexInput dataIn)
throws IOException {
ref.ints[ref.offset] = dataIn.readInt();
}
};
}
@Override
void clear() {
super.clear();
values = new int[0];
}
};
final static class LongValues extends IndexDocValuesArray {
private long[] values;
LongValues(AtomicLong bytesUsed) {
super(bytesUsed, RamUsageEstimator.NUM_BYTES_LONG,
ValueType.FIXED_INTS_64);
values = new long[0];
}
LongValues(IndexInput input) throws IOException {
super(new AtomicLong(), RamUsageEstimator.NUM_BYTES_LONG,
ValueType.FIXED_INTS_64);
final int numDocs = input.readInt();
values = new long[numDocs];
adjustSize(numDocs);
for (int i = 0; i < values.length; i++) {
values[i] = input.readLong();
}
maxDocID = numDocs - 1;
}
@Override
public long[] getArray() {
return values;
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
protected void setInternal(int docId, long value) {
values[docId] = value;
}
@Override
protected int grow(int numDocs) {
values = ArrayUtil.grow(values, numDocs);
return values.length;
}
@Override
void write(IndexOutput output, int numDocs) throws IOException {
assert maxDocID + 1 <= numDocs;
output.writeInt(numDocs);
for (int i = 0; i < maxDocID + 1; i++) {
output.writeLong(values[i]);
}
for (int i = maxDocID + 1; i < numDocs; i++) {
output.writeLong(0l);
}
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input)
throws IOException {
return new FixedIntsEnumImpl(attrSource, input, type()) {
@Override
protected void fillNext(LongsRef ref, IndexInput dataIn)
throws IOException {
ref.ints[ref.offset] = dataIn.readLong();
}
};
}
@Override
void clear() {
super.clear();
values = new long[0];
}
};
private abstract static class FixedIntsEnumImpl extends ValuesEnum {
private final IndexInput dataIn;
private final int maxDoc;
private final int sizeInByte;
private int pos = -1;
private FixedIntsEnumImpl(AttributeSource source, IndexInput dataIn,
ValueType type) throws IOException {
super(source, type);
switch (type) {
case FIXED_INTS_16:
sizeInByte = 2;
break;
case FIXED_INTS_32:
sizeInByte = 4;
break;
case FIXED_INTS_64:
sizeInByte = 8;
break;
case FIXED_INTS_8:
sizeInByte = 1;
break;
default:
throw new IllegalStateException("type " + type
+ " is not a fixed int type");
}
intsRef.offset = 0;
this.dataIn = dataIn;
maxDoc = dataIn.readInt();
}
@Override
public void close() throws IOException {
dataIn.close();
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
}
assert target > pos;
if (target > pos + 1) {
dataIn
.seek(dataIn.getFilePointer() + ((target - pos - 1) * sizeInByte));
}
fillNext(intsRef, dataIn);
return pos = target;
}
protected abstract void fillNext(LongsRef ref, IndexInput input)
throws IOException;
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
}

View File

@ -33,14 +33,11 @@ public class Ints {
private Ints() {
}
public static Writer getWriter(Directory dir, String id,
boolean useFixedArray, AtomicLong bytesUsed) throws IOException {
// TODO - implement fixed?!
return new IntsWriter(dir, id, bytesUsed);
public static Writer getWriter(Directory dir, String id, AtomicLong bytesUsed, ValueType type) throws IOException {
return new IntsWriter(dir, id, bytesUsed, type);
}
public static IndexDocValues getValues(Directory dir, String id,
boolean useFixedArray) throws IOException {
public static IndexDocValues getValues(Directory dir, String id) throws IOException {
return new IntsReader(dir, id);
}
}

View File

@ -21,19 +21,24 @@ import java.util.Collection;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.IndexDocValuesArray;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
import org.apache.lucene.index.values.IndexDocValuesArray.IntValues;
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
import org.apache.lucene.index.values.IndexDocValuesArray.ShortValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
/**
* Stores ints packed with fixed-bit precision.
* Stores ints packed and fixed with fixed-bit precision.
*
* @lucene.experimental
* */
@ -41,42 +46,56 @@ class IntsImpl {
private static final String CODEC_NAME = "Ints";
private static final byte PACKED = 0x00;
private static final byte FIXED = 0x01;
private static final byte FIXED_64 = 0x01;
private static final byte FIXED_32 = 0x02;
private static final byte FIXED_16 = 0x03;
private static final byte FIXED_8 = 0x04;
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class IntsWriter extends Writer {
// TODO: can we bulkcopy this on a merge?
// TODO: optimize merging here!!
private LongsRef intsRef;
private long[] docToValue;
private final IndexDocValuesArray array;
private long minValue;
private long maxValue;
private boolean started;
private final String id;
private int lastDocId = -1;
private IndexOutput datOut;
private final Directory dir;
private final byte typeOrd;
protected IntsWriter(Directory dir, String id, AtomicLong bytesUsed)
throws IOException {
protected IntsWriter(Directory dir, String id, AtomicLong bytesUsed,
ValueType valueType) throws IOException {
super(bytesUsed);
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
DATA_EXTENSION));
boolean success = false;
try {
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
this.id = id;
docToValue = new long[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG); // TODO the
// bitset
// needs memory
// too
success = true;
} finally {
if (!success) {
datOut.close();
}
this.dir = dir;
this.id = id;
switch (valueType) {
case FIXED_INTS_16:
array= new ShortValues(bytesUsed);
typeOrd = FIXED_16;
break;
case FIXED_INTS_32:
array = new IntValues(bytesUsed);
typeOrd = FIXED_32;
break;
case FIXED_INTS_64:
array = new LongValues(bytesUsed);
typeOrd = FIXED_64;
break;
case FIXED_INTS_8:
array = new ByteValues(bytesUsed);
typeOrd = FIXED_8;
break;
case VAR_INTS:
array = new LongValues(bytesUsed);
typeOrd = PACKED;
break;
default:
throw new IllegalStateException("unknown type " + valueType);
}
}
@ -94,58 +113,53 @@ class IntsImpl {
}
}
lastDocId = docID;
if (docID >= docToValue.length) {
final long len = docToValue.length;
docToValue = ArrayUtil.grow(docToValue, 1 + docID);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
* ((docToValue.length) - len));
}
docToValue[docID] = v;
array.set(docID, v);
}
@Override
public void finish(int docCount) throws IOException {
IndexOutput datOut = null;
boolean success = false;
try {
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
DATA_EXTENSION));
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
if (!started) {
minValue = maxValue = 0;
}
// if we exceed the range of positive longs we must switch to fixed ints
if ((maxValue - minValue) < (((long)1) << 63) && (maxValue - minValue) >= 0) {
writePackedInts(docCount);
} else {
writeFixedInts(docCount);
byte headerType = typeOrd;
if (typeOrd == PACKED) {
final long delta = maxValue - minValue;
// if we exceed the range of positive longs we must switch to fixed ints
if (delta <= ( maxValue >= 0 && minValue <= 0 ? Long.MAX_VALUE : Long.MAX_VALUE -1) && delta >= 0) {
writePackedInts(datOut, docCount);
return;
}
headerType = FIXED_64;
}
datOut.writeByte(headerType);
array.write(datOut, docCount);
success = true;
} finally {
datOut.close();
bytesUsed
.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG * docToValue.length));
docToValue = null;
IOUtils.closeSafely(!success, datOut);
array.clear();
}
}
private void writeFixedInts(int docCount) throws IOException {
datOut.writeByte(FIXED);
datOut.writeInt(docCount);
for (int i = 0; i < docToValue.length; i++) {
datOut.writeLong(docToValue[i]); // write full array - we use 0 as default
}
for (int i = docToValue.length; i < docCount; i++) {
datOut.writeLong(0); // fill with defaults values
}
}
private void writePackedInts(int docCount) throws IOException {
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
datOut.writeByte(PACKED);
datOut.writeLong(minValue);
assert array.type() == ValueType.FIXED_INTS_64;
final long[] docToValue = (long[])array.getArray();
// write a default value to recognize docs without a value for that
// field
final long defaultValue = maxValue>= 0 && minValue <=0 ? 0-minValue : ++maxValue-minValue;
final long defaultValue = maxValue >= 0 && minValue <= 0 ? 0 - minValue
: ++maxValue - minValue;
datOut.writeLong(defaultValue);
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
PackedInts.bitsRequired(maxValue-minValue));
final int limit = docToValue.length > docCount ? docCount : docToValue.length;
PackedInts.bitsRequired(maxValue - minValue));
final int limit = docToValue.length > docCount ? docCount
: docToValue.length;
for (int i = 0; i < limit; i++) {
w.add(docToValue[i] == 0 ? defaultValue : docToValue[i] - minValue);
}
@ -183,7 +197,7 @@ class IntsImpl {
*/
static class IntsReader extends IndexDocValues {
private final IndexInput datIn;
private final boolean packed;
private final byte type;
protected IntsReader(Directory dir, String id) throws IOException {
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
@ -191,7 +205,7 @@ class IntsImpl {
boolean success = false;
try {
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
packed = PACKED == datIn.readByte();
type = datIn.readByte();
success = true;
} finally {
if (!success) {
@ -206,101 +220,22 @@ class IntsImpl {
*/
@Override
public Source load() throws IOException {
final IndexInput input = (IndexInput) datIn.clone();
boolean success = false;
final Source source;
IndexInput input = null;
try {
final Source source = packed ? new PackedIntsSource(input)
: new FixedIntsSource(input);
input = (IndexInput) datIn.clone();
input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
source = loadFixedSource(type, input);
success = true;
return source;
} finally {
if (!success) {
IOUtils.closeSafely(true, datIn);
IOUtils.closeSafely(true, input, datIn);
}
}
}
private static class FixedIntsSource extends Source {
private final long[] values;
public FixedIntsSource(IndexInput dataIn) throws IOException {
dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
final int numDocs = dataIn.readInt();
values = new long[numDocs];
for (int i = 0; i < values.length; i++) {
values[i] = dataIn.readLong();
}
}
@Override
public long getInt(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
public ValueType type() {
return ValueType.INTS;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource)
throws IOException {
return new SourceEnum(attrSource, type(), this, values.length) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
return pos = NO_MORE_DOCS;
intsRef.ints[intsRef.offset] = values[target];
return pos = target;
}
};
}
}
private static class PackedIntsSource extends Source {
private final long minValue;
private final long defaultValue;
private final PackedInts.Reader values;
public PackedIntsSource(IndexInput dataIn) throws IOException {
dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
values = PackedInts.getReader(dataIn);
}
@Override
public long getInt(int docID) {
// TODO -- can we somehow avoid 2X method calls
// on each get? must push minValue down, and make
// PackedInts implement Ints.Source
assert docID >= 0;
final long value = values.get(docID);
return value == defaultValue ? 0 : minValue + value;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource)
throws IOException {
return new SourceEnum(attrSource, type(), this, values.size()) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
return pos = NO_MORE_DOCS;
intsRef.ints[intsRef.offset] = source.getInt(target);
return pos = target;
}
};
}
@Override
public ValueType type() {
return ValueType.INTS;
}
}
@Override
public void close() throws IOException {
super.close();
@ -312,8 +247,8 @@ class IntsImpl {
final IndexInput input = (IndexInput) datIn.clone();
boolean success = false;
try {
ValuesEnum inst = packed ? new PackedIntsEnumImpl(source, input)
: new FixedIntsEnumImpl(source, input);
input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
final ValuesEnum inst = directEnum(type, source, input);
success = true;
return inst;
} finally {
@ -325,11 +260,86 @@ class IntsImpl {
@Override
public ValueType type() {
return ValueType.INTS;
return ValueType.VAR_INTS;
}
}
private static ValuesEnum directEnum(byte ord, AttributeSource attrSource, IndexInput input) throws IOException {
switch (ord) {
case FIXED_16:
return new ShortValues((AtomicLong)null).getDirectEnum(attrSource, input);
case FIXED_32:
return new IntValues((AtomicLong)null).getDirectEnum(attrSource, input);
case FIXED_64:
return new LongValues((AtomicLong)null).getDirectEnum(attrSource, input);
case FIXED_8:
return new ByteValues((AtomicLong)null).getDirectEnum(attrSource, input);
case PACKED:
return new PackedIntsEnumImpl(attrSource, input);
default:
throw new IllegalStateException("unknown type ordinal " + ord);
}
}
private static IndexDocValues.Source loadFixedSource(byte ord, IndexInput input) throws IOException {
switch (ord) {
case FIXED_16:
return new ShortValues(input);
case FIXED_32:
return new IntValues(input);
case FIXED_64:
return new LongValues(input);
case FIXED_8:
return new ByteValues(input);
case PACKED:
return new PackedIntsSource(input);
default:
throw new IllegalStateException("unknown type ordinal " + ord);
}
}
static class PackedIntsSource extends Source {
private final long minValue;
private final long defaultValue;
private final PackedInts.Reader values;
public PackedIntsSource(IndexInput dataIn) throws IOException {
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
values = PackedInts.getReader(dataIn);
}
@Override
public long getInt(int docID) {
// TODO -- can we somehow avoid 2X method calls
// on each get? must push minValue down, and make
// PackedInts implement Ints.Source
assert docID >= 0;
final long value = values.get(docID);
return value == defaultValue ? 0 : minValue + value;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, values.size()) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
return pos = NO_MORE_DOCS;
intsRef.ints[intsRef.offset] = source.getInt(target);
return pos = target;
}
};
}
@Override
public ValueType type() {
return ValueType.VAR_INTS;
}
}
private static final class PackedIntsEnumImpl extends ValuesEnum {
private final PackedInts.ReaderIterator ints;
private long minValue;
@ -340,10 +350,9 @@ class IntsImpl {
private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
throws IOException {
super(source, ValueType.INTS);
super(source, ValueType.VAR_INTS);
intsRef.offset = 0;
this.dataIn = dataIn;
dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
this.ints = PackedInts.getReaderIterator(dataIn);
@ -380,50 +389,6 @@ class IntsImpl {
}
}
private static final class FixedIntsEnumImpl extends ValuesEnum {
private final IndexInput dataIn;
private final int maxDoc;
private int pos = -1;
private FixedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
throws IOException {
super(source, ValueType.INTS);
intsRef.offset = 0;
this.dataIn = dataIn;
dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
maxDoc = dataIn.readInt();
}
@Override
public void close() throws IOException {
dataIn.close();
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
}
assert target > pos;
if (target > pos+1) {
dataIn.seek(dataIn.getFilePointer() + ((target - pos - 1) * 8));
}
intsRef.ints[intsRef.offset] = dataIn.readLong();
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
}

View File

@ -18,8 +18,8 @@ package org.apache.lucene.index.values;
*/
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.packed.PackedInts;
@ -32,16 +32,14 @@ import org.apache.lucene.util.packed.PackedInts;
* @lucene.experimental
*/
public enum ValueType {
/*
* TODO: Add INT_32 INT_64 INT_16 & INT_8?!
*/
/**
* A 64 bit integer value. By default this type uses
* A variable bit signed integer value. By default this type uses
* {@link PackedInts} to compress the values, as an offset
* from the minimum value, as long as the value range
* fits into 2<sup>63</sup>-1. Otherwise,
* the default implementation falls back to fixed size 64bit
* integers.
* integers ({@link #FIXED_INTS_64}).
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
@ -50,13 +48,65 @@ public enum ValueType {
* value assigned. Custom default values must be assigned explicitly.
* </p>
*/
INTS,
VAR_INTS,
/**
* A 8 bit signed integer value. {@link Source} instances of
* this type return a <tt>byte</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* </p>
*/
FIXED_INTS_8,
/**
* A 16 bit signed integer value. {@link Source} instances of
* this type return a <tt>short</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* </p>
*/
FIXED_INTS_16,
/**
* A 32 bit signed integer value. {@link Source} instances of
* this type return a <tt>int</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* </p>
*/
FIXED_INTS_32,
/**
* A 64 bit signed integer value. {@link Source} instances of
* this type return a <tt>long</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* </p>
*/
FIXED_INTS_64,
/**
* A 32 bit floating point value. By default there is no compression
* applied. To fit custom float values into less than 32bit either a custom
* implementation is needed or values must be encoded into a
* {@link #BYTES_FIXED_STRAIGHT} type.
* {@link #BYTES_FIXED_STRAIGHT} type. {@link Source} instances of
* this type return a <tt>float</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0.0f</tt> as the default value without any
* distinction between provided <tt>0.0f</tt> values during indexing. All
@ -67,10 +117,12 @@ public enum ValueType {
*/
FLOAT_32,
/**
*
* A 64 bit floating point value. By default there is no compression
* applied. To fit custom float values into less than 64bit either a custom
* implementation is needed or values must be encoded into a
* {@link #BYTES_FIXED_STRAIGHT} type.
* {@link #BYTES_FIXED_STRAIGHT} type. {@link Source} instances of
* this type return a <tt>double</tt> array from {@link Source#getArray()}
* <p>
* NOTE: this type uses <tt>0.0d</tt> as the default value without any
* distinction between provided <tt>0.0d</tt> values during indexing. All

View File

@ -71,13 +71,18 @@ public abstract class ValuesEnum extends DocIdSetIterator {
case BYTES_VAR_STRAIGHT:
bytesRef = new BytesRef();
break;
case INTS:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
intsRef = new LongsRef(1);
break;
case FLOAT_32:
case FLOAT_64:
floatsRef = new FloatsRef(1);
break;
}
}

View File

@ -197,8 +197,12 @@ public abstract class Writer extends DocValuesConsumer {
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
}
switch (type) {
case INTS:
return Ints.getWriter(directory, id, true, bytesUsed);
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getWriter(directory, id, bytesUsed, type);
case FLOAT_32:
return Floats.getWriter(directory, id, 4, bytesUsed);
case FLOAT_64:
@ -221,6 +225,7 @@ public abstract class Writer extends DocValuesConsumer {
case BYTES_VAR_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false,
bytesUsed);
default:
throw new IllegalArgumentException("Unknown Values: " + type);
}

View File

@ -59,7 +59,7 @@ public class NumericIndexDocValueSource extends ValueSource {
}
};
case INTS:
case VAR_INTS:
return new DocValues() {
@Override
public String toString(int doc) {

View File

@ -200,9 +200,21 @@ public class RandomIndexWriter implements Closeable {
case FLOAT_64:
docValuesField.setFloat(random.nextDouble());
break;
case INTS:
case VAR_INTS:
docValuesField.setInt(random.nextLong());
break;
case FIXED_INTS_16:
docValuesField.setInt(random.nextInt(Short.MAX_VALUE));
break;
case FIXED_INTS_32:
docValuesField.setInt(random.nextInt());
break;
case FIXED_INTS_64:
docValuesField.setInt(random.nextLong());
break;
case FIXED_INTS_8:
docValuesField.setInt(random.nextInt(128));
break;
default:
throw new IllegalArgumentException("no such type: " + type);
}

View File

@ -170,76 +170,241 @@ public class TestDocValues extends LuceneTestCase {
dir.close();
}
public void testInts() throws IOException {
long[] maxMin = new long[] {
Long.MIN_VALUE, Long.MAX_VALUE,
1, Long.MAX_VALUE,
0, Long.MAX_VALUE,
-1, Long.MAX_VALUE,
Long.MIN_VALUE, -1,
random.nextInt(), random.nextInt() };
for (int j = 0; j < maxMin.length; j+=2) {
long maxV = 1;
final int NUM_VALUES = 777 + random.nextInt(777);
final long[] values = new long[NUM_VALUES];
for (int rx = 1; rx < 63; rx++, maxV *= 2) {
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Ints.getWriter(dir, "test", false, trackBytes);
values[0] = maxMin[j];
w.add(0, values[0]);
values[1] = maxMin[j+1];
w.add(1, values[1]);
for (int i = 2; i < NUM_VALUES; i++) {
final long v = random.nextLong() % (1 + maxV);
values[i] = v;
w.add(i, v);
}
final int additionalDocs = 1 + random.nextInt(9);
w.finish(NUM_VALUES + additionalDocs);
assertEquals(0, trackBytes.get());
public void testVariableIntsLimits() throws IOException {
long[][] minMax = new long[][] { { Long.MIN_VALUE, Long.MAX_VALUE },
{ Long.MIN_VALUE + 1, 1 }, { -1, Long.MAX_VALUE },
{ Long.MIN_VALUE, -1 }, { 1, Long.MAX_VALUE },
{ -1, Long.MAX_VALUE - 1 }, { Long.MIN_VALUE + 2, 1 }, };
ValueType[] expectedTypes = new ValueType[] { ValueType.FIXED_INTS_64,
ValueType.FIXED_INTS_64, ValueType.FIXED_INTS_64,
ValueType.FIXED_INTS_64, ValueType.VAR_INTS, ValueType.VAR_INTS,
ValueType.VAR_INTS, };
for (int i = 0; i < minMax.length; i++) {
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.VAR_INTS);
w.add(0, minMax[i][0]);
w.add(1, minMax[i][1]);
w.finish(2);
assertEquals(0, trackBytes.get());
IndexDocValues r = Ints.getValues(dir, "test");
Source source = getSource(r);
assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
expectedTypes[i], source.type());
assertEquals(minMax[i][0], source.getInt(0));
assertEquals(minMax[i][1], source.getInt(1));
ValuesEnum iEnum = getEnum(r);
assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
expectedTypes[i], iEnum.type());
assertEquals(0, iEnum.nextDoc());
assertEquals(minMax[i][0], iEnum.intsRef.get());
assertEquals(1, iEnum.nextDoc());
assertEquals(minMax[i][1], iEnum.intsRef.get());
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
IndexDocValues r = Ints.getValues(dir, "test", false);
for (int iter = 0; iter < 2; iter++) {
Source s = getSource(r);
for (int i = 0; i < NUM_VALUES; i++) {
final long v = s.getInt(i);
assertEquals("index " + i, values[i], v);
}
}
r.close();
dir.close();
}
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum iEnum = getEnum(r);
LongsRef ints = iEnum.getInt();
for (int i = 0; i < NUM_VALUES + additionalDocs; i++) {
assertEquals(i, iEnum.nextDoc());
if (i < NUM_VALUES) {
assertEquals(values[i], ints.get());
} else {
assertEquals(0, ints.get());
}
}
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
iEnum.close();
}
public void testVInts() throws IOException {
testInts(ValueType.VAR_INTS, 63);
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum iEnum = getEnum(r);
LongsRef ints = iEnum.getInt();
for (int i = 0; i < NUM_VALUES + additionalDocs; i += 1 + random.nextInt(25)) {
assertEquals(i, iEnum.advance(i));
if (i < NUM_VALUES) {
assertEquals(values[i], ints.get());
} else {
assertEquals(0, ints.get());
}
}
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs));
iEnum.close();
}
r.close();
dir.close();
public void testFixedInts() throws IOException {
testInts(ValueType.FIXED_INTS_64, 63);
testInts(ValueType.FIXED_INTS_32, 31);
testInts(ValueType.FIXED_INTS_16, 15);
testInts(ValueType.FIXED_INTS_8, 7);
}
public void testGetInt8Array() throws IOException {
byte[] sourceArray = new byte[] {1,2,3};
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.FIXED_INTS_8);
for (int i = 0; i < sourceArray.length; i++) {
w.add(i, (long) sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Ints.getValues(dir, "test");
Source source = r.getSource();
assertTrue(source.hasArray());
byte[] loaded = ((byte[])source.getArray());
assertEquals(loaded.length, sourceArray.length);
for (int i = 0; i < loaded.length; i++) {
assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i]);
}
r.close();
dir.close();
}
public void testGetInt16Array() throws IOException {
short[] sourceArray = new short[] {1,2,3};
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.FIXED_INTS_16);
for (int i = 0; i < sourceArray.length; i++) {
w.add(i, (long) sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Ints.getValues(dir, "test");
Source source = r.getSource();
assertTrue(source.hasArray());
short[] loaded = ((short[])source.getArray());
assertEquals(loaded.length, sourceArray.length);
for (int i = 0; i < loaded.length; i++) {
assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i]);
}
r.close();
dir.close();
}
public void testGetInt64Array() throws IOException {
long[] sourceArray = new long[] {1,2,3};
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.FIXED_INTS_64);
for (int i = 0; i < sourceArray.length; i++) {
w.add(i, sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Ints.getValues(dir, "test");
Source source = r.getSource();
assertTrue(source.hasArray());
long[] loaded = ((long[])source.getArray());
assertEquals(loaded.length, sourceArray.length);
for (int i = 0; i < loaded.length; i++) {
assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i]);
}
r.close();
dir.close();
}
public void testGetInt32Array() throws IOException {
int[] sourceArray = new int[] {1,2,3};
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Ints.getWriter(dir, "test", trackBytes, ValueType.FIXED_INTS_32);
for (int i = 0; i < sourceArray.length; i++) {
w.add(i, (long) sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Ints.getValues(dir, "test");
Source source = r.getSource();
assertTrue(source.hasArray());
int[] loaded = ((int[])source.getArray());
assertEquals(loaded.length, sourceArray.length);
for (int i = 0; i < loaded.length; i++) {
assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i]);
}
r.close();
dir.close();
}
public void testGetFloat32Array() throws IOException {
float[] sourceArray = new float[] {1,2,3};
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Floats.getWriter(dir, "test", 4, trackBytes);
for (int i = 0; i < sourceArray.length; i++) {
w.add(i, sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Floats.getValues(dir, "test", 3);
Source source = r.getSource();
assertTrue(source.hasArray());
float[] loaded = ((float[])source.getArray());
assertEquals(loaded.length, sourceArray.length);
for (int i = 0; i < loaded.length; i++) {
assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i], 0.0f);
}
r.close();
dir.close();
}
public void testGetFloat64Array() throws IOException {
double[] sourceArray = new double[] {1,2,3};
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Floats.getWriter(dir, "test", 8, trackBytes);
for (int i = 0; i < sourceArray.length; i++) {
w.add(i, sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Floats.getValues(dir, "test", 3);
Source source = r.getSource();
assertTrue(source.hasArray());
double[] loaded = ((double[])source.getArray());
assertEquals(loaded.length, sourceArray.length);
for (int i = 0; i < loaded.length; i++) {
assertEquals("value didn't match at index " + i, sourceArray[i], loaded[i], 0.0d);
}
r.close();
dir.close();
}
private void testInts(ValueType type, int maxBit) throws IOException {
long maxV = 1;
final int NUM_VALUES = 333 + random.nextInt(333);
final long[] values = new long[NUM_VALUES];
for (int rx = 1; rx < maxBit; rx++, maxV *= 2) {
Directory dir = newDirectory();
final AtomicLong trackBytes = new AtomicLong(0);
Writer w = Ints.getWriter(dir, "test", trackBytes, type);
for (int i = 0; i < NUM_VALUES; i++) {
final long v = random.nextLong() % (1 + maxV);
values[i] = v;
w.add(i, v);
}
final int additionalDocs = 1 + random.nextInt(9);
w.finish(NUM_VALUES + additionalDocs);
assertEquals(0, trackBytes.get());
IndexDocValues r = Ints.getValues(dir, "test");
for (int iter = 0; iter < 2; iter++) {
Source s = getSource(r);
assertEquals(type, s.type());
for (int i = 0; i < NUM_VALUES; i++) {
final long v = s.getInt(i);
assertEquals("index " + i, values[i], v);
}
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum iEnum = getEnum(r);
assertEquals(type, iEnum.type());
LongsRef ints = iEnum.getInt();
for (int i = 0; i < NUM_VALUES + additionalDocs; i++) {
assertEquals(i, iEnum.nextDoc());
if (i < NUM_VALUES) {
assertEquals(values[i], ints.get());
} else {
assertEquals(0, ints.get());
}
}
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
iEnum.close();
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum iEnum = getEnum(r);
assertEquals(type, iEnum.type());
LongsRef ints = iEnum.getInt();
for (int i = 0; i < NUM_VALUES + additionalDocs; i += 1 + random.nextInt(25)) {
assertEquals(i, iEnum.advance(i));
if (i < NUM_VALUES) {
assertEquals(values[i], ints.get());
} else {
assertEquals(0, ints.get());
}
}
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs));
iEnum.close();
}
r.close();
dir.close();
}
}

View File

@ -113,44 +113,20 @@ public class TestDocValuesIndexing extends LuceneTestCase {
dir.close();
}
/**
* Tests complete indexing of {@link ValueType} including deletions, merging and
* sparse value fields on Compound-File
*/
public void testIndexBytesNoDeletesCFS() throws IOException {
runTestIndexBytes(writerConfig(true), false);
}
public void testIndexBytesDeletesCFS() throws IOException {
runTestIndexBytes(writerConfig(true), true);
}
public void testIndexNumericsNoDeletesCFS() throws IOException {
runTestNumerics(writerConfig(true), false);
}
public void testIndexNumericsDeletesCFS() throws IOException {
runTestNumerics(writerConfig(true), true);
}
/**
* Tests complete indexing of {@link ValueType} including deletions, merging and
* sparse value fields on None-Compound-File
*/
public void testIndexBytesNoDeletes() throws IOException {
runTestIndexBytes(writerConfig(false), false);
runTestIndexBytes(writerConfig(random.nextBoolean()), false);
}
public void testIndexBytesDeletes() throws IOException {
runTestIndexBytes(writerConfig(false), true);
runTestIndexBytes(writerConfig(random.nextBoolean()), true);
}
public void testIndexNumericsNoDeletes() throws IOException {
runTestNumerics(writerConfig(false), false);
runTestNumerics(writerConfig(random.nextBoolean()), false);
}
public void testIndexNumericsDeletes() throws IOException {
runTestNumerics(writerConfig(false), true);
runTestNumerics(writerConfig(random.nextBoolean()), true);
}
public void testAddIndexes() throws IOException {
@ -204,7 +180,11 @@ public class TestDocValuesIndexing extends LuceneTestCase {
case BYTES_VAR_STRAIGHT:
case FLOAT_32:
case FLOAT_64:
case INTS:
case VAR_INTS:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1));
}
@ -246,7 +226,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
throws IOException {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, cfg);
final int numValues = 179 + random.nextInt(151);
final int numValues = 50 + atLeast(10);
final List<ValueType> numVariantList = new ArrayList<ValueType>(NUMERICS);
// run in random order to test if fill works correctly during merges
@ -258,8 +238,16 @@ public class TestDocValuesIndexing extends LuceneTestCase {
IndexReader r = IndexReader.open(w, true);
final int numRemainingValues = (int) (numValues - deleted.cardinality());
final int base = r.numDocs() - numRemainingValues;
// for FIXED_INTS_8 we use value mod 128 - to enable testing in
// one go we simply use numValues as the mod for all other INT types
int mod = numValues;
switch (val) {
case INTS: {
case FIXED_INTS_8:
mod = 128;
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case VAR_INTS: {
IndexDocValues intsReader = getDocValues(r, val.name());
assertNotNull(intsReader);
@ -283,8 +271,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+ " docs", i, intsEnum.advance(i));
assertEquals(expected, ints.getInt(i));
assertEquals(expected, enumRef.get());
assertEquals(val + "" + mod + " " + i, expected%mod, ints.getInt(i));
assertEquals(expected%mod, enumRef.get());
}
}
@ -338,11 +326,11 @@ public class TestDocValuesIndexing extends LuceneTestCase {
final List<ValueType> byteVariantList = new ArrayList<ValueType>(BYTES);
// run in random order to test if fill works correctly during merges
Collections.shuffle(byteVariantList, random);
final int numValues = 179 + random.nextInt(151);
final int numValues = 50 + atLeast(10);
for (ValueType byteIndexValue : byteVariantList) {
List<Closeable> closeables = new ArrayList<Closeable>();
int bytesSize = 1 + random.nextInt(128);
int bytesSize = 1 + atLeast(10);
OpenBitSet deleted = indexValues(w, numValues, byteIndexValue,
byteVariantList, withDeletions, bytesSize);
final IndexReader r = IndexReader.open(w, withDeletions);
@ -485,8 +473,12 @@ public class TestDocValuesIndexing extends LuceneTestCase {
ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_FIXED_STRAIGHT, ValueType.BYTES_VAR_DEREF,
ValueType.BYTES_VAR_SORTED, ValueType.BYTES_VAR_STRAIGHT);
private static EnumSet<ValueType> NUMERICS = EnumSet.of(ValueType.INTS,
ValueType.FLOAT_32, ValueType.FLOAT_64);
private static EnumSet<ValueType> NUMERICS = EnumSet.of(ValueType.VAR_INTS,
ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
ValueType.FIXED_INTS_64,
ValueType.FIXED_INTS_8,
ValueType.FLOAT_32,
ValueType.FLOAT_64);
private static Index[] IDX_VALUES = new Index[] { Index.ANALYZED,
Index.ANALYZED_NO_NORMS, Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS,
@ -517,8 +509,20 @@ public class TestDocValuesIndexing extends LuceneTestCase {
for (int i = 0; i < numValues; i++) {
if (isNumeric) {
switch (value) {
case INTS:
valField.setInt(i);
case VAR_INTS:
valField.setInt((long)i);
break;
case FIXED_INTS_16:
valField.setInt((short)i, random.nextInt(10) != 0);
break;
case FIXED_INTS_32:
valField.setInt(i, random.nextInt(10) != 0);
break;
case FIXED_INTS_64:
valField.setInt((long)i, random.nextInt(10) != 0);
break;
case FIXED_INTS_8:
valField.setInt((byte)(0xFF & (i % 128)), random.nextInt(10) != 0);
break;
case FLOAT_32:
valField.setFloat(2.0f * i);
@ -526,6 +530,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
case FLOAT_64:
valField.setFloat(2.0d * i);
break;
default:
fail("unexpected value " + value);
}

View File

@ -124,7 +124,7 @@ public class TestSort extends LuceneTestCase {
doc.add (new Field ("contents", data[i][1], Field.Store.NO, Field.Index.ANALYZED));
if (data[i][2] != null) {
Field f = supportsDocValues ?
IndexDocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.INTS)
IndexDocValuesField.set(new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED), ValueType.VAR_INTS)
: new Field ("int", data[i][2], Field.Store.NO, Field.Index.NOT_ANALYZED);
doc.add(f);
}