Optimization in fielddata cache where ordinals are used instead of flat arrays when number of unique values is low

This commit is contained in:
uboness 2013-04-13 12:42:53 -07:00
parent e7b49d8936
commit 20e6df9f34
5 changed files with 87 additions and 5 deletions

View File

@ -29,6 +29,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.*;
@ -103,12 +104,22 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
Ordinals build = builder.build(fieldDataType.getSettings()); Ordinals build = builder.build(fieldDataType.getSettings());
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals(); Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_DOUBLE + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_DOUBLE;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new DoubleArrayAtomicFieldData.WithOrdinals(values.toArray(new double[values.size()]), reader.maxDoc(), build);
}
double[] sValues = new double[reader.maxDoc()]; double[] sValues = new double[reader.maxDoc()];
int maxDoc = reader.maxDoc(); int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) { for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i)); sValues[i] = values.get(ordinals.getOrd(i));
} }
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) { if (set == null) {
return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc()); return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else { } else {

View File

@ -29,6 +29,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.*;
@ -103,12 +104,21 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
Ordinals build = builder.build(fieldDataType.getSettings()); Ordinals build = builder.build(fieldDataType.getSettings());
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals(); Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_FLOAT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_FLOAT;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new FloatArrayAtomicFieldData.WithOrdinals(values.toArray(new float[values.size()]), reader.maxDoc(), build);
}
float[] sValues = new float[reader.maxDoc()]; float[] sValues = new float[reader.maxDoc()];
int maxDoc = reader.maxDoc(); int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) { for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i)); sValues[i] = values.get(ordinals.getOrd(i));
} }
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) { if (set == null) {
return new FloatArrayAtomicFieldData.Single(sValues, reader.maxDoc()); return new FloatArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else { } else {

View File

@ -30,6 +30,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.*;
@ -146,6 +147,11 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumeric
} }
return sValues; return sValues;
} }
@Override
public int size() {
return values.size();
}
}); });
} }
} }
@ -160,6 +166,11 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumeric
public int[] toArray() { public int[] toArray() {
return values.toArray(); return values.toArray();
} }
@Override
public int size() {
return values.size();
}
}); });
} finally { } finally {
builder.close(); builder.close();
@ -170,17 +181,28 @@ public class IntArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumeric
int get(int index); int get(int index);
int[] toArray(); int[] toArray();
int size();
} }
static IntArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderIntegers values) { static IntArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderIntegers values) {
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals(); Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_INT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_INT;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new IntArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
int[] sValues = new int[reader.maxDoc()]; int[] sValues = new int[reader.maxDoc()];
int maxDoc = reader.maxDoc(); int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) { for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i)); sValues[i] = values.get(ordinals.getOrd(i));
} }
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) { if (set == null) {
return new IntArrayAtomicFieldData.Single(sValues, reader.maxDoc()); return new IntArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else { } else {

View File

@ -30,6 +30,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.*;
@ -146,6 +147,11 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumeri
} }
return sValues; return sValues;
} }
@Override
public int size() {
return values.size();
}
}); });
} else if (min >= Integer.MIN_VALUE && max <= Integer.MAX_VALUE) { } else if (min >= Integer.MIN_VALUE && max <= Integer.MAX_VALUE) {
return IntArrayIndexFieldData.build(reader, fieldDataType, builder, build, new IntArrayIndexFieldData.BuilderIntegers() { return IntArrayIndexFieldData.build(reader, fieldDataType, builder, build, new IntArrayIndexFieldData.BuilderIntegers() {
@ -163,18 +169,33 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumeri
} }
return iValues; return iValues;
} }
@Override
public int size() {
return values.size();
}
}); });
} }
} }
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals(); Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_LONG + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_LONG;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new LongArrayAtomicFieldData.WithOrdinals(values.toArray(new long[values.size()]), reader.maxDoc(), build);
}
long[] sValues = new long[reader.maxDoc()]; long[] sValues = new long[reader.maxDoc()];
int maxDoc = reader.maxDoc(); int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) { for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i)); sValues[i] = values.get(ordinals.getOrd(i));
} }
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) { if (set == null) {
return new LongArrayAtomicFieldData.Single(sValues, reader.maxDoc()); return new LongArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else { } else {

View File

@ -30,6 +30,7 @@ import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.*;
@ -143,6 +144,11 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumer
public short[] toArray() { public short[] toArray() {
return values.toArray(); return values.toArray();
} }
@Override
public int size() {
return values.size();
}
}); });
} finally { } finally {
builder.close(); builder.close();
@ -153,17 +159,29 @@ public class ShortArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumer
short get(int index); short get(int index);
short[] toArray(); short[] toArray();
int size();
} }
static ShortArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderShorts values) { static ShortArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderShorts values) {
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) { if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals(); Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_SHORT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_SHORT;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new ShortArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
short[] sValues = new short[reader.maxDoc()]; short[] sValues = new short[reader.maxDoc()];
int maxDoc = reader.maxDoc(); int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) { for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i)); sValues[i] = values.get(ordinals.getOrd(i));
} }
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) { if (set == null) {
return new ShortArrayAtomicFieldData.Single(sValues, reader.maxDoc()); return new ShortArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else { } else {