Field data: Try to load short values as byte values and load int values as short or byte values to reduce the size they take in memory.

This commit is contained in:
Martijn van Groningen 2013-04-12 19:11:18 +02:00
parent 5fbd4a12a0
commit 80dbca0809
4 changed files with 210 additions and 15 deletions

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.iterator.TIntIterator;
import gnu.trove.list.array.TIntArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
@ -42,7 +43,7 @@ import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomicFieldData> implements IndexNumericFieldData<IntArrayAtomicFieldData> {
public class IntArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
public static class Builder implements IndexFieldData.Builder {
@ -69,7 +70,7 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
}
@Override
public IntArrayAtomicFieldData load(AtomicReaderContext context) {
public AtomicNumericFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
@ -82,7 +83,7 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
}
@Override
public IntArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
@ -94,12 +95,61 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
values.add(0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
try {
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
BytesRef term;
int max = Integer.MIN_VALUE;
int min = Integer.MAX_VALUE;
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
while ((term = iter.next()) != null) {
values.add(NumericUtils.prefixCodedToInt(term));
int value = NumericUtils.prefixCodedToInt(term);
values.add(value);
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
Ordinals build = builder.build(fieldDataType.getSettings());
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
// if we can fit all our values in a byte or short we should do this!
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
@Override
public byte get(int index) {
return (byte) values.get(index);
}
@Override
public byte[] toArray() {
byte[] bValues = new byte[values.size()];
int i = 0;
for (TIntIterator it = values.iterator(); it.hasNext(); ) {
bValues[i++] = (byte) it.next();
}
return bValues;
}
});
} else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
return ShortArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ShortArrayIndexFieldData.BuilderShorts() {
@Override
public short get(int index) {
return (short) values.get(index);
}
@Override
public short[] toArray() {
short[] sValues = new short[values.size()];
int i = 0;
for (TIntIterator it = values.iterator(); it.hasNext(); ) {
sValues[i++] = (short) it.next();
}
return sValues;
}
});
}
}
return build(reader, fieldDataType, builder, build, new BuilderIntegers() {
@Override
public int get(int index) {

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.iterator.TShortIterator;
import gnu.trove.list.array.TShortArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
@ -42,7 +43,7 @@ import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayAtomicFieldData> implements IndexNumericFieldData<ShortArrayAtomicFieldData> {
public class ShortArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
public static class Builder implements IndexFieldData.Builder {
@ -69,7 +70,7 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
}
@Override
public ShortArrayAtomicFieldData load(AtomicReaderContext context) {
public AtomicNumericFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
@ -82,7 +83,7 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
}
@Override
public ShortArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
@ -94,13 +95,44 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
values.add((short) 0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
try {
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
BytesRef term;
short max = Short.MIN_VALUE;
short min = Short.MAX_VALUE;
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
while ((term = iter.next()) != null) {
values.add((short) NumericUtils.prefixCodedToInt(term));
short value = (short) NumericUtils.prefixCodedToInt(term);
values.add(value);
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
Ordinals build = builder.build(fieldDataType.getSettings());
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
// if we can fit all our values in a byte we should do this!
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
@Override
public byte get(int index) {
return (byte) values.get(index);
}
@Override
public byte[] toArray() {
byte[] bValues = new byte[values.size()];
int i = 0;
for (TShortIterator it = values.iterator(); it.hasNext(); ) {
bValues[i++] = (byte) it.next();
}
return bValues;
}
});
}
}
return build(reader, fieldDataType, builder, build, new BuilderShorts() {
@Override
public short get(int index) {

View File

@ -19,11 +19,19 @@
package org.elasticsearch.test.unit.index.fielddata;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.*;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
/**
*/
@ -34,6 +42,63 @@ public class IntFieldDataTests extends NumericFieldDataTests {
return new FieldDataType("int");
}
@Test
public void testOptimizeTypeByte() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
}
@Test
public void testOptimizeTypeShort() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
}
@Test
public void testOptimizeTypeInteger() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE));
}
@Override
protected void fillSingleValueAllSet() throws Exception {
Document d = new Document();

View File

@ -21,9 +21,19 @@ package org.elasticsearch.test.unit.index.fielddata;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
/**
*/
@ -34,4 +44,42 @@ public class ShortFieldDataTests extends IntFieldDataTests {
return new FieldDataType("short");
}
@Test
public void testOptimizeTypeByte() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
}
@Test
public void testOptimizeTypeShort() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
}
}