mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 10:25:15 +00:00
Field data: Try to load short values as byte values and load int values as short or byte values to reduce the size they take in memory.
This commit is contained in:
parent
5fbd4a12a0
commit
80dbca0809
@ -19,6 +19,7 @@
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import gnu.trove.iterator.TIntIterator;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
@ -42,7 +43,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomicFieldData> implements IndexNumericFieldData<IntArrayAtomicFieldData> {
|
||||
public class IntArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
@ -69,7 +70,7 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntArrayAtomicFieldData load(AtomicReaderContext context) {
|
||||
public AtomicNumericFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
@ -82,7 +83,7 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
@ -94,12 +95,61 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<IntArrayAtomi
|
||||
values.add(0); // first "t" indicates null value
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRef term;
|
||||
int max = Integer.MIN_VALUE;
|
||||
int min = Integer.MAX_VALUE;
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
while ((term = iter.next()) != null) {
|
||||
values.add(NumericUtils.prefixCodedToInt(term));
|
||||
int value = NumericUtils.prefixCodedToInt(term);
|
||||
values.add(value);
|
||||
if (value > max) {
|
||||
max = value;
|
||||
}
|
||||
if (value < min) {
|
||||
min = value;
|
||||
}
|
||||
}
|
||||
|
||||
Ordinals build = builder.build(fieldDataType.getSettings());
|
||||
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
|
||||
// if we can fit all our values in a byte or short we should do this!
|
||||
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
|
||||
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
|
||||
@Override
|
||||
public byte get(int index) {
|
||||
return (byte) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toArray() {
|
||||
byte[] bValues = new byte[values.size()];
|
||||
int i = 0;
|
||||
for (TIntIterator it = values.iterator(); it.hasNext(); ) {
|
||||
bValues[i++] = (byte) it.next();
|
||||
}
|
||||
return bValues;
|
||||
}
|
||||
});
|
||||
} else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
|
||||
return ShortArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ShortArrayIndexFieldData.BuilderShorts() {
|
||||
@Override
|
||||
public short get(int index) {
|
||||
return (short) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short[] toArray() {
|
||||
short[] sValues = new short[values.size()];
|
||||
int i = 0;
|
||||
for (TIntIterator it = values.iterator(); it.hasNext(); ) {
|
||||
sValues[i++] = (short) it.next();
|
||||
}
|
||||
return sValues;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return build(reader, fieldDataType, builder, build, new BuilderIntegers() {
|
||||
@Override
|
||||
public int get(int index) {
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import gnu.trove.iterator.TShortIterator;
|
||||
import gnu.trove.list.array.TShortArrayList;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
@ -42,7 +43,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayAtomicFieldData> implements IndexNumericFieldData<ShortArrayAtomicFieldData> {
|
||||
public class ShortArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
@ -69,7 +70,7 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
|
||||
}
|
||||
|
||||
@Override
|
||||
public ShortArrayAtomicFieldData load(AtomicReaderContext context) {
|
||||
public AtomicNumericFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
@ -82,7 +83,7 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
|
||||
}
|
||||
|
||||
@Override
|
||||
public ShortArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
@ -94,13 +95,44 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<ShortArrayA
|
||||
values.add((short) 0); // first "t" indicates null value
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRef term;
|
||||
short max = Short.MIN_VALUE;
|
||||
short min = Short.MAX_VALUE;
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
while ((term = iter.next()) != null) {
|
||||
values.add((short) NumericUtils.prefixCodedToInt(term));
|
||||
short value = (short) NumericUtils.prefixCodedToInt(term);
|
||||
values.add(value);
|
||||
if (value > max) {
|
||||
max = value;
|
||||
}
|
||||
if (value < min) {
|
||||
min = value;
|
||||
}
|
||||
}
|
||||
|
||||
Ordinals build = builder.build(fieldDataType.getSettings());
|
||||
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
|
||||
// if we can fit all our values in a byte we should do this!
|
||||
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
|
||||
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
|
||||
@Override
|
||||
public byte get(int index) {
|
||||
return (byte) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toArray() {
|
||||
byte[] bValues = new byte[values.size()];
|
||||
int i = 0;
|
||||
for (TShortIterator it = values.iterator(); it.hasNext(); ) {
|
||||
bValues[i++] = (byte) it.next();
|
||||
}
|
||||
return bValues;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return build(reader, fieldDataType, builder, build, new BuilderShorts() {
|
||||
@Override
|
||||
public short get(int index) {
|
||||
|
@ -19,11 +19,19 @@
|
||||
|
||||
package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
/**
|
||||
*/
|
||||
@ -34,6 +42,63 @@ public class IntFieldDataTests extends NumericFieldDataTests {
|
||||
return new FieldDataType("int");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeByte() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeShort() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeInteger() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fillSingleValueAllSet() throws Exception {
|
||||
Document d = new Document();
|
||||
|
@ -21,9 +21,19 @@ package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
/**
|
||||
*/
|
||||
@ -34,4 +44,42 @@ public class ShortFieldDataTests extends IntFieldDataTests {
|
||||
return new FieldDataType("short");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeByte() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeShort() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user