From 80dbca0809c92000a493ce0187a17ad4377b9064 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 12 Apr 2013 19:11:18 +0200 Subject: [PATCH] Field data: Try to load short values as byte values and load int values as short or byte values to reduce the size they take in memory. --- .../plain/IntArrayIndexFieldData.java | 60 +++++++++++++-- .../plain/ShortArrayIndexFieldData.java | 42 +++++++++-- .../index/fielddata/IntFieldDataTests.java | 73 ++++++++++++++++++- .../index/fielddata/ShortFieldDataTests.java | 50 ++++++++++++- 4 files changed, 210 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayIndexFieldData.java index 03c5dc6dcb2..133043f51cd 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayIndexFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.fielddata.plain; +import gnu.trove.iterator.TIntIterator; import gnu.trove.list.array.TIntArrayList; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; @@ -42,7 +43,7 @@ import org.elasticsearch.index.settings.IndexSettings; /** */ -public class IntArrayIndexFieldData extends AbstractIndexFieldData implements IndexNumericFieldData { +public class IntArrayIndexFieldData extends AbstractIndexFieldData implements IndexNumericFieldData { public static class Builder implements IndexFieldData.Builder { @@ -69,7 +70,7 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData max) { + max = value; + } + if (value < min) { + min = value; + } } + Ordinals build = builder.build(fieldDataType.getSettings()); + if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) { + // if we can fit all our values in a byte or short we should do this! + if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) { + return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() { + @Override + public byte get(int index) { + return (byte) values.get(index); + } + + @Override + public byte[] toArray() { + byte[] bValues = new byte[values.size()]; + int i = 0; + for (TIntIterator it = values.iterator(); it.hasNext(); ) { + bValues[i++] = (byte) it.next(); + } + return bValues; + } + }); + } else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) { + return ShortArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ShortArrayIndexFieldData.BuilderShorts() { + @Override + public short get(int index) { + return (short) values.get(index); + } + + @Override + public short[] toArray() { + short[] sValues = new short[values.size()]; + int i = 0; + for (TIntIterator it = values.iterator(); it.hasNext(); ) { + sValues[i++] = (short) it.next(); + } + return sValues; + } + }); + } + } + return build(reader, fieldDataType, builder, build, new BuilderIntegers() { @Override public int get(int index) { diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/ShortArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/ShortArrayIndexFieldData.java index 9c0a15170ff..814e4010973 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/ShortArrayIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/ShortArrayIndexFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.fielddata.plain; +import gnu.trove.iterator.TShortIterator; import gnu.trove.list.array.TShortArrayList; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; @@ -42,7 +43,7 @@ import org.elasticsearch.index.settings.IndexSettings; /** */ -public class ShortArrayIndexFieldData extends AbstractIndexFieldData implements IndexNumericFieldData { +public class ShortArrayIndexFieldData extends AbstractIndexFieldData implements IndexNumericFieldData { public static class Builder implements IndexFieldData.Builder { @@ -69,7 +70,7 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData max) { + max = value; + } + if (value < min) { + min = value; + } } Ordinals build = builder.build(fieldDataType.getSettings()); + if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) { + // if we can fit all our values in a byte we should do this! + if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) { + return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() { + @Override + public byte get(int index) { + return (byte) values.get(index); + } + + @Override + public byte[] toArray() { + byte[] bValues = new byte[values.size()]; + int i = 0; + for (TShortIterator it = values.iterator(); it.hasNext(); ) { + bValues[i++] = (byte) it.next(); + } + return bValues; + } + }); + } + } + return build(reader, fieldDataType, builder, build, new BuilderShorts() { @Override public short get(int index) { diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/IntFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/IntFieldDataTests.java index eadfa4802e9..90728b6d1d5 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/IntFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/IntFieldDataTests.java @@ -19,11 +19,19 @@ package org.elasticsearch.test.unit.index.fielddata; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.IntField; -import org.apache.lucene.document.StringField; +import org.apache.lucene.document.*; +import org.elasticsearch.index.fielddata.AtomicNumericFieldData; import org.elasticsearch.index.fielddata.FieldDataType; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData; +import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData; +import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData; +import org.elasticsearch.index.mapper.FieldMapper; +import org.testng.annotations.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; /** */ @@ -34,6 +42,63 @@ public class IntFieldDataTests extends NumericFieldDataTests { return new FieldDataType("int"); } + @Test + public void testOptimizeTypeByte() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO)); + writer.addDocument(d); + + IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long")); + AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader()); + assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class)); + assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE)); + assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE)); + } + + @Test + public void testOptimizeTypeShort() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO)); + writer.addDocument(d); + + IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long")); + AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader()); + assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class)); + assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE)); + assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE)); + } + + @Test + public void testOptimizeTypeInteger() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO)); + writer.addDocument(d); + + IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long")); + AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader()); + assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class)); + assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE)); + assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE)); + } + @Override protected void fillSingleValueAllSet() throws Exception { Document d = new Document(); diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ShortFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ShortFieldDataTests.java index 6ca9a5a9eca..3cd4dcf93d5 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ShortFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ShortFieldDataTests.java @@ -21,9 +21,19 @@ package org.elasticsearch.test.unit.index.fielddata; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.IntField; +import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; +import org.elasticsearch.index.fielddata.AtomicNumericFieldData; import org.elasticsearch.index.fielddata.FieldDataType; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData; +import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData; +import org.elasticsearch.index.mapper.FieldMapper; +import org.testng.annotations.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; /** */ @@ -34,4 +44,42 @@ public class ShortFieldDataTests extends IntFieldDataTests { return new FieldDataType("short"); } + @Test + public void testOptimizeTypeByte() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO)); + writer.addDocument(d); + + IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long")); + AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader()); + assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class)); + assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE)); + assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE)); + } + + @Test + public void testOptimizeTypeShort() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO)); + writer.addDocument(d); + + IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long")); + AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader()); + assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class)); + assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE)); + assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE)); + } + }