Field Data: optimize long type to use narrowest possible type automatically

closes #2795
This commit is contained in:
Shay Banon 2013-03-18 12:37:15 +01:00
parent 82072fc47f
commit 7d9cef904b
5 changed files with 268 additions and 60 deletions

View File

@ -100,30 +100,47 @@ public class ByteArrayIndexFieldData extends AbstractIndexFieldData<ByteArrayAto
} }
try { try {
Ordinals build = builder.build(fieldDataType.getSettings()); Ordinals build = builder.build(fieldDataType.getSettings());
if (!build.isMultiValued()) { return build(reader, builder, build, new BuilderBytes() {
Docs ordinals = build.ordinals(); @Override
byte[] sValues = new byte[reader.maxDoc()]; public byte get(int index) {
int maxDoc = reader.maxDoc(); return values.get(index);
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
} }
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) { @Override
return new ByteArrayAtomicFieldData.Single(sValues, reader.maxDoc()); public byte[] toArray() {
} else { return values.toArray();
return new ByteArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
} }
} else { });
return new ByteArrayAtomicFieldData.WithOrdinals(
values.toArray(new byte[values.size()]),
reader.maxDoc(),
build);
}
} finally { } finally {
builder.close(); builder.close();
} }
} }
static interface BuilderBytes {
byte get(int index);
byte[] toArray();
}
static ByteArrayAtomicFieldData build(AtomicReader reader, OrdinalsBuilder builder, Ordinals build, BuilderBytes values) {
if (!build.isMultiValued()) {
Docs ordinals = build.ordinals();
byte[] sValues = new byte[reader.maxDoc()];
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
}
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) {
return new ByteArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else {
return new ByteArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new ByteArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
}
@Override @Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) { public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
return new ByteValuesComparatorSource(this, missingValue, sortMode); return new ByteValuesComparatorSource(this, missingValue, sortMode);

View File

@ -100,30 +100,50 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
values.add(NumericUtils.prefixCodedToInt(term)); values.add(NumericUtils.prefixCodedToInt(term));
} }
Ordinals build = builder.build(fieldDataType.getSettings()); Ordinals build = builder.build(fieldDataType.getSettings());
if (!build.isMultiValued()) { return build(reader, builder, build, new BuilderIntegers() {
Docs ordinals = build.ordinals(); @Override
int[] sValues = new int[reader.maxDoc()]; public int get(int index) {
int maxDoc = reader.maxDoc(); return values.get(index);
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
} }
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) { @Override
return new IntArrayAtomicFieldData.Single(sValues, reader.maxDoc()); public int[] toArray() {
} else { return values.toArray();
return new IntArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
} }
} else { });
return new IntArrayAtomicFieldData.WithOrdinals(
values.toArray(new int[values.size()]),
reader.maxDoc(),
build);
}
} finally { } finally {
builder.close(); builder.close();
} }
} }
static interface BuilderIntegers {
int get(int index);
int[] toArray();
}
static IntArrayAtomicFieldData build(AtomicReader reader, OrdinalsBuilder builder, Ordinals build, BuilderIntegers values) {
if (!build.isMultiValued()) {
Docs ordinals = build.ordinals();
int[] sValues = new int[reader.maxDoc()];
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
}
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) {
return new IntArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else {
return new IntArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new IntArrayAtomicFieldData.WithOrdinals(
values.toArray(),
reader.maxDoc(),
build);
}
}
@Override @Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) { public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
return new IntValuesComparatorSource(this, missingValue, sortMode); return new IntValuesComparatorSource(this, missingValue, sortMode);

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.fielddata.plain; package org.elasticsearch.index.fielddata.plain;
import gnu.trove.iterator.TLongIterator;
import gnu.trove.list.array.TLongArrayList; import gnu.trove.list.array.TLongArrayList;
import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.AtomicReaderContext;
@ -42,7 +43,7 @@ import org.elasticsearch.index.settings.IndexSettings;
/** /**
*/ */
public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAtomicFieldData> implements IndexNumericFieldData<LongArrayAtomicFieldData> { public class LongArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
public static class Builder implements IndexFieldData.Builder { public static class Builder implements IndexFieldData.Builder {
@ -69,7 +70,7 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAto
} }
@Override @Override
public LongArrayAtomicFieldData load(AtomicReaderContext context) { public AtomicNumericFieldData load(AtomicReaderContext context) {
try { try {
return cache.load(context, this); return cache.load(context, this);
} catch (Throwable e) { } catch (Throwable e) {
@ -82,7 +83,7 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAto
} }
@Override @Override
public LongArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception { public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader(); AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName()); Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) { if (terms == null) {
@ -96,10 +97,76 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAto
try { try {
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric64Bit(terms.iterator(null)), reader.getLiveDocs()); BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric64Bit(terms.iterator(null)), reader.getLiveDocs());
BytesRef term; BytesRef term;
long max = Long.MIN_VALUE;
long min = Long.MAX_VALUE;
while ((term = iter.next()) != null) { while ((term = iter.next()) != null) {
values.add(NumericUtils.prefixCodedToLong(term)); long value = NumericUtils.prefixCodedToLong(term);
values.add(value);
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
} }
Ordinals build = builder.build(fieldDataType.getSettings()); Ordinals build = builder.build(fieldDataType.getSettings());
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
// since the default mapping for numeric is long, its worth optimizing the actual type used to represent the data
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
return ByteArrayIndexFieldData.build(reader, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
@Override
public byte get(int index) {
return (byte) values.get(index);
}
@Override
public byte[] toArray() {
byte[] bValues = new byte[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
bValues[i++] = (byte) it.next();
}
return bValues;
}
});
} else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
return ShortArrayIndexFieldData.build(reader, builder, build, new ShortArrayIndexFieldData.BuilderShorts() {
@Override
public short get(int index) {
return (short) values.get(index);
}
@Override
public short[] toArray() {
short[] sValues = new short[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
sValues[i++] = (short) it.next();
}
return sValues;
}
});
} else if (min >= Integer.MIN_VALUE && max <= Integer.MAX_VALUE) {
return IntArrayIndexFieldData.build(reader, builder, build, new IntArrayIndexFieldData.BuilderIntegers() {
@Override
public int get(int index) {
return (int) values.get(index);
}
@Override
public int[] toArray() {
int[] iValues = new int[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
iValues[i++] = (int) it.next();
}
return iValues;
}
});
}
}
if (!build.isMultiValued()) { if (!build.isMultiValued()) {
Docs ordinals = build.ordinals(); Docs ordinals = build.ordinals();
long[] sValues = new long[reader.maxDoc()]; long[] sValues = new long[reader.maxDoc()];
@ -114,10 +181,7 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<LongArrayAto
return new LongArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set); return new LongArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
} }
} else { } else {
return new LongArrayAtomicFieldData.WithOrdinals( return new LongArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
values.toArray(new long[values.size()]),
reader.maxDoc(),
build);
} }
} finally { } finally {
builder.close(); builder.close();

View File

@ -101,30 +101,47 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
} }
Ordinals build = builder.build(fieldDataType.getSettings()); Ordinals build = builder.build(fieldDataType.getSettings());
if (!build.isMultiValued()) { return build(reader, builder, build, new BuilderShorts() {
Docs ordinals = build.ordinals(); @Override
short[] sValues = new short[reader.maxDoc()]; public short get(int index) {
int maxDoc = reader.maxDoc(); return values.get(index);
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
} }
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) { @Override
return new ShortArrayAtomicFieldData.Single(sValues, reader.maxDoc()); public short[] toArray() {
} else { return values.toArray();
return new ShortArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
} }
} else { });
return new ShortArrayAtomicFieldData.WithOrdinals(
values.toArray(new short[values.size()]),
reader.maxDoc(),
build);
}
} finally { } finally {
builder.close(); builder.close();
} }
} }
static interface BuilderShorts {
short get(int index);
short[] toArray();
}
static ShortArrayAtomicFieldData build(AtomicReader reader, OrdinalsBuilder builder, Ordinals build, BuilderShorts values) {
if (!build.isMultiValued()) {
Docs ordinals = build.ordinals();
short[] sValues = new short[reader.maxDoc()];
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
}
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) {
return new ShortArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else {
return new ShortArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new ShortArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
}
@Override @Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) { public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
return new ShortValuesComparatorSource(this, missingValue, sortMode); return new ShortValuesComparatorSource(this, missingValue, sortMode);

View File

@ -23,7 +23,20 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldDataType; import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.LongArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
/** /**
*/ */
@ -31,7 +44,84 @@ public class LongFieldDataTests extends NumericFieldDataTests {
@Override @Override
protected FieldDataType getFieldDataType() { protected FieldDataType getFieldDataType() {
return new FieldDataType("long"); // we don't want to optimize the type so it will always be a long...
return new FieldDataType("long", ImmutableSettings.builder().put("optimize_type", false));
}
@Test
public void testOptimizeTypeByte() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
}
@Test
public void testOptimizeTypeShort() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
}
@Test
public void testOptimizeTypeInteger() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE));
}
@Test
public void testOptimizeTypeLong() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Integer.MAX_VALUE + 1l, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Integer.MIN_VALUE - 1l, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(LongArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE + 1l));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE - 1l));
} }
@Override @Override