Field Data: optimize long type to use narrowest possible type automatically

closes #2795
This commit is contained in:
Shay Banon 2013-03-18 12:37:15 +01:00
parent 82072fc47f
commit 7d9cef904b
5 changed files with 268 additions and 60 deletions

View File

@ -100,6 +100,29 @@ public class ByteArrayIndexFieldData extends AbstractIndexFieldData<ByteArrayAto
}
try {
Ordinals build = builder.build(fieldDataType.getSettings());
return build(reader, builder, build, new BuilderBytes() {
@Override
public byte get(int index) {
return values.get(index);
}
@Override
public byte[] toArray() {
return values.toArray();
}
});
} finally {
builder.close();
}
}
static interface BuilderBytes {
byte get(int index);
byte[] toArray();
}
static ByteArrayAtomicFieldData build(AtomicReader reader, OrdinalsBuilder builder, Ordinals build, BuilderBytes values) {
if (!build.isMultiValued()) {
Docs ordinals = build.ordinals();
byte[] sValues = new byte[reader.maxDoc()];
@ -114,13 +137,7 @@ public class ByteArrayIndexFieldData extends AbstractIndexFieldData<ByteArrayAto
return new ByteArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new ByteArrayAtomicFieldData.WithOrdinals(
values.toArray(new byte[values.size()]),
reader.maxDoc(),
build);
}
} finally {
builder.close();
return new ByteArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
}

View File

@ -100,6 +100,29 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
values.add(NumericUtils.prefixCodedToInt(term));
}
Ordinals build = builder.build(fieldDataType.getSettings());
return build(reader, builder, build, new BuilderIntegers() {
@Override
public int get(int index) {
return values.get(index);
}
@Override
public int[] toArray() {
return values.toArray();
}
});
} finally {
builder.close();
}
}
static interface BuilderIntegers {
int get(int index);
int[] toArray();
}
static IntArrayAtomicFieldData build(AtomicReader reader, OrdinalsBuilder builder, Ordinals build, BuilderIntegers values) {
if (!build.isMultiValued()) {
Docs ordinals = build.ordinals();
int[] sValues = new int[reader.maxDoc()];
@ -115,13 +138,10 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
}
} else {
return new IntArrayAtomicFieldData.WithOrdinals(
values.toArray(new int[values.size()]),
values.toArray(),
reader.maxDoc(),
build);
}
} finally {
builder.close();
}
}
@Override

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.iterator.TLongIterator;
import gnu.trove.list.array.TLongArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
@ -42,7 +43,7 @@ import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAtomicFieldData> implements IndexNumericFieldData<LongArrayAtomicFieldData> {
public class LongArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
public static class Builder implements IndexFieldData.Builder {
@ -69,7 +70,7 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAto
}
@Override
public LongArrayAtomicFieldData load(AtomicReaderContext context) {
public AtomicNumericFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
@ -82,7 +83,7 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAto
}
@Override
public LongArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
@ -96,10 +97,76 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAto
try {
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric64Bit(terms.iterator(null)), reader.getLiveDocs());
BytesRef term;
long max = Long.MIN_VALUE;
long min = Long.MAX_VALUE;
while ((term = iter.next()) != null) {
values.add(NumericUtils.prefixCodedToLong(term));
long value = NumericUtils.prefixCodedToLong(term);
values.add(value);
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
Ordinals build = builder.build(fieldDataType.getSettings());
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
// since the default mapping for numeric is long, its worth optimizing the actual type used to represent the data
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
return ByteArrayIndexFieldData.build(reader, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
@Override
public byte get(int index) {
return (byte) values.get(index);
}
@Override
public byte[] toArray() {
byte[] bValues = new byte[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
bValues[i++] = (byte) it.next();
}
return bValues;
}
});
} else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
return ShortArrayIndexFieldData.build(reader, builder, build, new ShortArrayIndexFieldData.BuilderShorts() {
@Override
public short get(int index) {
return (short) values.get(index);
}
@Override
public short[] toArray() {
short[] sValues = new short[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
sValues[i++] = (short) it.next();
}
return sValues;
}
});
} else if (min >= Integer.MIN_VALUE && max <= Integer.MAX_VALUE) {
return IntArrayIndexFieldData.build(reader, builder, build, new IntArrayIndexFieldData.BuilderIntegers() {
@Override
public int get(int index) {
return (int) values.get(index);
}
@Override
public int[] toArray() {
int[] iValues = new int[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
iValues[i++] = (int) it.next();
}
return iValues;
}
});
}
}
if (!build.isMultiValued()) {
Docs ordinals = build.ordinals();
long[] sValues = new long[reader.maxDoc()];
@ -114,10 +181,7 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAto
return new LongArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new LongArrayAtomicFieldData.WithOrdinals(
values.toArray(new long[values.size()]),
reader.maxDoc(),
build);
return new LongArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
} finally {
builder.close();

View File

@ -101,6 +101,29 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
}
Ordinals build = builder.build(fieldDataType.getSettings());
return build(reader, builder, build, new BuilderShorts() {
@Override
public short get(int index) {
return values.get(index);
}
@Override
public short[] toArray() {
return values.toArray();
}
});
} finally {
builder.close();
}
}
static interface BuilderShorts {
short get(int index);
short[] toArray();
}
static ShortArrayAtomicFieldData build(AtomicReader reader, OrdinalsBuilder builder, Ordinals build, BuilderShorts values) {
if (!build.isMultiValued()) {
Docs ordinals = build.ordinals();
short[] sValues = new short[reader.maxDoc()];
@ -115,13 +138,7 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
return new ShortArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new ShortArrayAtomicFieldData.WithOrdinals(
values.toArray(new short[values.size()]),
reader.maxDoc(),
build);
}
} finally {
builder.close();
return new ShortArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
}

View File

@ -23,7 +23,20 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.LongArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
/**
*/
@ -31,7 +44,84 @@ public class LongFieldDataTests extends NumericFieldDataTests {
@Override
protected FieldDataType getFieldDataType() {
return new FieldDataType("long");
// we don't want to optimize the type so it will always be a long...
return new FieldDataType("long", ImmutableSettings.builder().put("optimize_type", false));
}
@Test
public void testOptimizeTypeByte() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
}
@Test
public void testOptimizeTypeShort() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
}
@Test
public void testOptimizeTypeInteger() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE));
}
@Test
public void testOptimizeTypeLong() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Integer.MAX_VALUE + 1l, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Integer.MIN_VALUE - 1l, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(LongArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE + 1l));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE - 1l));
}
@Override