Merge integer field data implementations.

This commit merges field data implementations for byte, short, int and long
data into PackedArrayAtomicFieldData which uses Lucene's PackedInts API to
store data.

Close #3220
This commit is contained in:
Adrien Grand 2013-06-19 17:54:44 +02:00
parent 5a20ba5ff2
commit 2fb5d3ff51
28 changed files with 715 additions and 2307 deletions

View File

@ -50,10 +50,10 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.put("string", new PagedBytesIndexFieldData.Builder())
.put("float", new FloatArrayIndexFieldData.Builder())
.put("double", new DoubleArrayIndexFieldData.Builder())
.put("byte", new ByteArrayIndexFieldData.Builder())
.put("short", new ShortArrayIndexFieldData.Builder())
.put("int", new IntArrayIndexFieldData.Builder())
.put("long", new LongArrayIndexFieldData.Builder())
.put("byte", new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
.put("short", new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
.put("int", new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
.put("long", new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
.put("geo_point", new GeoPointDoubleArrayIndexFieldData.Builder())
.immutableMap();
@ -62,10 +62,10 @@ public class IndexFieldDataService extends AbstractIndexComponent {
.put(Tuple.tuple("string", "fst"), new FSTBytesIndexFieldData.Builder())
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
.put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder())
.put(Tuple.tuple("short", "array"), new ShortArrayIndexFieldData.Builder())
.put(Tuple.tuple("int", "array"), new IntArrayIndexFieldData.Builder())
.put(Tuple.tuple("long", "array"), new LongArrayIndexFieldData.Builder())
.put(Tuple.tuple("byte", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
.put(Tuple.tuple("short", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
.put(Tuple.tuple("int", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
.put(Tuple.tuple("long", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
.put(Tuple.tuple("geo_point", "array"), new GeoPointDoubleArrayIndexFieldData.Builder())
.immutableMap();
}

View File

@ -20,6 +20,8 @@
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.TermsEnum;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
/**
*/
@ -94,6 +96,13 @@ public interface IndexNumericFieldData<FD extends AtomicNumericFieldData> extend
public abstract boolean isFloatingPoint();
public abstract int requiredBits();
public final TermsEnum wrapTermsEnum(TermsEnum termsEnum) {
if (requiredBits() > 32) {
return OrdinalsBuilder.wrapNumeric64Bit(termsEnum);
} else {
return OrdinalsBuilder.wrapNumeric32Bit(termsEnum);
}
}
}
NumericType getNumericType();

View File

@ -36,7 +36,7 @@ import java.util.Comparator;
/**
* Simple class to build document ID <-> ordinal mapping. Note: Ordinals are
* <tt>1</tt> based monotocially increasing positive integers. <tt>0</tt>
* <tt>1</tt> based monotonically increasing positive integers. <tt>0</tt>
* donates the missing value in this context.
*/
public final class OrdinalsBuilder implements Closeable {
@ -55,10 +55,8 @@ public final class OrdinalsBuilder implements Closeable {
private int numMultiValuedDocs = 0;
private int totalNumOrds = 0;
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator) throws IOException {
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator, float acceptableOverheadRatio) throws IOException {
this.maxDoc = maxDoc;
// TODO: Make configurable...
float acceptableOverheadRatio = PackedInts.FAST;
if (preDefineBitsRequired) {
int numTerms = (int) terms.size();
if (numTerms == -1) {
@ -75,15 +73,15 @@ public final class OrdinalsBuilder implements Closeable {
}
public OrdinalsBuilder(int maxDoc) throws IOException {
this(null, false, maxDoc);
this(null, false, maxDoc, PackedInts.DEFAULT);
}
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc) throws IOException {
this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator());
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, float acceptableOverheadRatio) throws IOException {
this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator(), acceptableOverheadRatio);
}
public OrdinalsBuilder(Terms terms, int maxDoc) throws IOException {
this(terms, true, maxDoc, new DirectAllocator());
public OrdinalsBuilder(Terms terms, int maxDoc, float acceptableOverheadRatio) throws IOException {
this(terms, true, maxDoc, new DirectAllocator(), acceptableOverheadRatio);
}
/**
@ -296,7 +294,7 @@ public final class OrdinalsBuilder implements Closeable {
* A {@link TermsEnum} that iterates only full precision prefix coded 64 bit values.
* @see #buildFromTerms(TermsEnum, Bits)
*/
public TermsEnum wrapNumeric64Bit(TermsEnum termsEnum) {
public static TermsEnum wrapNumeric64Bit(TermsEnum termsEnum) {
return new FilteredTermsEnum(termsEnum, false) {
@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
@ -310,7 +308,7 @@ public final class OrdinalsBuilder implements Closeable {
* A {@link TermsEnum} that iterates only full precision prefix coded 32 bit values.
* @see #buildFromTerms(TermsEnum, Bits)
*/
public TermsEnum wrapNumeric32Bit(TermsEnum termsEnum) {
public static TermsEnum wrapNumeric32Bit(TermsEnum termsEnum) {
return new FilteredTermsEnum(termsEnum, false) {
@Override

View File

@ -1,329 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
/**
*/
public abstract class ByteArrayAtomicFieldData extends AtomicNumericFieldData {
public static final ByteArrayAtomicFieldData EMPTY = new Empty();
protected final byte[] values;
private final int numDocs;
protected long size = -1;
public ByteArrayAtomicFieldData(byte[] values, int numDocs) {
super(false);
this.values = values;
this.numDocs = numDocs;
}
@Override
public int getNumDocs() {
return numDocs;
}
@Override
public void close() {
}
static class Empty extends ByteArrayAtomicFieldData {
Empty() {
super(null, 0);
}
@Override
public LongValues getLongValues() {
return LongValues.EMPTY;
}
@Override
public DoubleValues getDoubleValues() {
return DoubleValues.EMPTY;
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
return 0;
}
@Override
public BytesValues getBytesValues() {
return BytesValues.EMPTY;
}
@Override
public ScriptDocValues getScriptValues() {
return ScriptDocValues.EMPTY;
}
}
public static class WithOrdinals extends ByteArrayAtomicFieldData {
private final Ordinals ordinals;
public WithOrdinals(byte[] values, int numDocs, Ordinals ordinals) {
super(values, numDocs);
this.ordinals = ordinals;
}
@Override
public boolean isMultiValued() {
return ordinals.isMultiValued();
}
@Override
public boolean isValuesOrdered() {
return true;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length) + ordinals.getMemorySizeInBytes();
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values, ordinals.ordinals());
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values, ordinals.ordinals());
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
private final byte[] values;
LongValues(byte[] values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public long getValueByOrd(int ord) {
return (long) values[ord];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
private final byte[] values;
DoubleValues(byte[] values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public double getValueByOrd(int ord) {
return (double) values[ord];
}
}
}
/**
* A single valued case, where not all values are "set", so we have a FixedBitSet that
* indicates which values have an actual value.
*/
public static class SingleFixedSet extends ByteArrayAtomicFieldData {
private final FixedBitSet set;
public SingleFixedSet(byte[] values, int numDocs, FixedBitSet set) {
super(values, numDocs);
this.set = set;
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values, set);
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values, set);
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
private final byte[] values;
private final FixedBitSet set;
LongValues(byte[] values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
}
@Override
public boolean hasValue(int docId) {
return set.get(docId);
}
@Override
public long getValue(int docId) {
return (long) values[docId];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
private final byte[] values;
private final FixedBitSet set;
DoubleValues(byte[] values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
}
@Override
public boolean hasValue(int docId) {
return set.get(docId);
}
@Override
public double getValue(int docId) {
return (double) values[docId];
}
}
}
/**
* Assumes all the values are "set", and docId is used as the index to the value array.
*/
public static class Single extends ByteArrayAtomicFieldData {
/**
* Note, here, we assume that there is no offset by 1 from docId, so position 0
* is the value for docId 0.
*/
public Single(byte[] values, int numDocs) {
super(values, numDocs);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length);
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values);
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values);
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
private final byte[] values;
LongValues(byte[] values) {
super(false);
this.values = values;
}
@Override
public long getValue(int docId) {
return (long) values[docId];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
private final byte[] values;
DoubleValues(byte[] values) {
super(false);
this.values = values;
}
@Override
public double getValue(int docId) {
return (double) values[docId];
}
}
}
}

View File

@ -1,148 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.list.array.TByteArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.ByteValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class ByteArrayIndexFieldData extends AbstractIndexFieldData<ByteArrayAtomicFieldData> implements IndexNumericFieldData<ByteArrayAtomicFieldData> {
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new ByteArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
public ByteArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
}
@Override
public NumericType getNumericType() {
return NumericType.BYTE;
}
@Override
public boolean valuesOrdered() {
// because we might have single values? we can dynamically update a flag to reflect that
// based on the atomic field data loaded
return false;
}
@Override
public ByteArrayAtomicFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticSearchException) {
throw (ElasticSearchException) e;
} else {
throw new ElasticSearchException(e.getMessage(), e);
}
}
}
@Override
public ByteArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
return ByteArrayAtomicFieldData.EMPTY;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
final TByteArrayList values = new TByteArrayList();
values.add((byte) 0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
BytesRef term;
while ((term = iter.next()) != null) {
values.add((byte) NumericUtils.prefixCodedToInt(term));
}
try {
Ordinals build = builder.build(fieldDataType.getSettings());
return build(reader, fieldDataType, builder, build, new BuilderBytes() {
@Override
public byte get(int index) {
return values.get(index);
}
@Override
public byte[] toArray() {
return values.toArray();
}
});
} finally {
builder.close();
}
}
static interface BuilderBytes {
byte get(int index);
byte[] toArray();
}
static ByteArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderBytes values) {
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals();
byte[] sValues = new byte[reader.maxDoc()];
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
}
final FixedBitSet set = builder.buildDocsWithValuesSet();
if (set == null) {
return new ByteArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else {
return new ByteArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new ByteArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
return new ByteValuesComparatorSource(this, missingValue, sortMode);
}
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
@ -94,9 +95,10 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
final TDoubleArrayList values = new TDoubleArrayList();
values.add(0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
try {
final BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric64Bit(terms.iterator(null)), reader.getLiveDocs());
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
BytesRef term;
while ((term = iter.next()) != null) {
values.add(NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term)));

View File

@ -26,6 +26,7 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -65,7 +66,8 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
final IntsRef scratch = new IntsRef();
boolean preDefineBitsRequired = regex == null && frequency == null;
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio);
try {
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support

View File

@ -27,6 +27,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
@ -94,9 +95,10 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
values.add(0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
try {
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
BytesRef term;
while ((term = iter.next()) != null) {
values.add(NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term)));

View File

@ -24,6 +24,7 @@ import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.*;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Nullable;
@ -86,7 +87,8 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
final TDoubleArrayList lon = new TDoubleArrayList();
lat.add(0); // first "t" indicates null value
lon.add(0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
final CharsRef spare = new CharsRef();
try {
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null), reader.getLiveDocs());

View File

@ -1,326 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
/**
*/
public abstract class IntArrayAtomicFieldData extends AtomicNumericFieldData {
public static final IntArrayAtomicFieldData EMPTY = new Empty();
protected final int[] values;
private final int numDocs;
protected long size = -1;
public IntArrayAtomicFieldData(int[] values, int numDocs) {
super(false);
this.values = values;
this.numDocs = numDocs;
}
@Override
public void close() {
}
@Override
public int getNumDocs() {
return numDocs;
}
static class Empty extends IntArrayAtomicFieldData {
Empty() {
super(null, 0);
}
@Override
public LongValues getLongValues() {
return LongValues.EMPTY;
}
@Override
public DoubleValues getDoubleValues() {
return DoubleValues.EMPTY;
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
return 0;
}
@Override
public BytesValues getBytesValues() {
return BytesValues.EMPTY;
}
@Override
public ScriptDocValues getScriptValues() {
return ScriptDocValues.EMPTY;
}
}
public static class WithOrdinals extends IntArrayAtomicFieldData {
private final Ordinals ordinals;
public WithOrdinals(int[] values, int numDocs, Ordinals ordinals) {
super(values, numDocs);
this.ordinals = ordinals;
}
@Override
public boolean isMultiValued() {
return ordinals.isMultiValued();
}
@Override
public boolean isValuesOrdered() {
return true;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_INT) + ordinals.getMemorySizeInBytes();
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values, ordinals.ordinals());
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values, ordinals.ordinals());
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
private final int[] values;
LongValues(int[] values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public long getValueByOrd(int ord) {
return (long) values[ord];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
private final int[] values;
DoubleValues(int[] values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public double getValueByOrd(int ord) {
return values[ord];
}
}
}
/**
* A single valued case, where not all values are "set", so we have a FixedBitSet that
* indicates which values have an actual value.
*/
public static class SingleFixedSet extends IntArrayAtomicFieldData {
private final FixedBitSet set;
public SingleFixedSet(int[] values, int numDocs, FixedBitSet set) {
super(values, numDocs);
this.set = set;
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_INT) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values, set);
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values, set);
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
private final int[] values;
private final FixedBitSet set;
LongValues(int[] values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
}
@Override
public boolean hasValue(int docId) {
return set.get(docId);
}
@Override
public long getValue(int docId) {
return (long) values[docId];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
private final int[] values;
private final FixedBitSet set;
DoubleValues(int[] values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
}
@Override
public boolean hasValue(int docId) {
return set.get(docId);
}
@Override
public double getValue(int docId) {
return (double) values[docId];
}
}
}
/**
* Assumes all the values are "set", and docId is used as the index to the value array.
*/
public static class Single extends IntArrayAtomicFieldData {
/**
* Note, here, we assume that there is no offset by 1 from docId, so position 0
* is the value for docId 0.
*/
public Single(int[] values, int numDocs) {
super(values, numDocs);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_INT);
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values);
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values);
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
private final int[] values;
LongValues(int[] values) {
super(false);
assert values.length != 0;
this.values = values;
}
@Override
public long getValue(int docId) {
return (long) values[docId];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
private final int[] values;
DoubleValues(int[] values) {
super(false);
this.values = values;
}
@Override
public double getValue(int docId) {
return values[docId];
}
}
}
}

View File

@ -1,223 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.iterator.TIntIterator;
import gnu.trove.list.array.TIntArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.IntValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class IntArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new IntArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
public IntArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
}
@Override
public NumericType getNumericType() {
return NumericType.INT;
}
@Override
public boolean valuesOrdered() {
// because we might have single values? we can dynamically update a flag to reflect that
// based on the atomic field data loaded
return false;
}
@Override
public AtomicNumericFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticSearchException) {
throw (ElasticSearchException) e;
} else {
throw new ElasticSearchException(e.getMessage(), e);
}
}
}
@Override
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
return IntArrayAtomicFieldData.EMPTY;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
final TIntArrayList values = new TIntArrayList();
values.add(0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
try {
BytesRef term;
int max = Integer.MIN_VALUE;
int min = Integer.MAX_VALUE;
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
while ((term = iter.next()) != null) {
int value = NumericUtils.prefixCodedToInt(term);
values.add(value);
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
Ordinals build = builder.build(fieldDataType.getSettings());
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
// if we can fit all our values in a byte or short we should do this!
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
@Override
public byte get(int index) {
return (byte) values.get(index);
}
@Override
public byte[] toArray() {
byte[] bValues = new byte[values.size()];
int i = 0;
for (TIntIterator it = values.iterator(); it.hasNext(); ) {
bValues[i++] = (byte) it.next();
}
return bValues;
}
});
} else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
return ShortArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ShortArrayIndexFieldData.BuilderShorts() {
@Override
public short get(int index) {
return (short) values.get(index);
}
@Override
public short[] toArray() {
short[] sValues = new short[values.size()];
int i = 0;
for (TIntIterator it = values.iterator(); it.hasNext(); ) {
sValues[i++] = (short) it.next();
}
return sValues;
}
@Override
public int size() {
return values.size();
}
});
}
}
return build(reader, fieldDataType, builder, build, new BuilderIntegers() {
@Override
public int get(int index) {
return values.get(index);
}
@Override
public int[] toArray() {
return values.toArray();
}
@Override
public int size() {
return values.size();
}
});
} finally {
builder.close();
}
}
static interface BuilderIntegers {
int get(int index);
int[] toArray();
int size();
}
static IntArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderIntegers values) {
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_INT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_INT;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new IntArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
int[] sValues = new int[reader.maxDoc()];
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
}
if (set == null) {
return new IntArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else {
return new IntArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new IntArrayAtomicFieldData.WithOrdinals(
values.toArray(),
reader.maxDoc(),
build);
}
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
return new IntValuesComparatorSource(this, missingValue, sortMode);
}
}

View File

@ -1,217 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.iterator.TLongIterator;
import gnu.trove.list.array.TLongArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class LongArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new LongArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
public LongArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
}
@Override
public NumericType getNumericType() {
return NumericType.LONG;
}
@Override
public boolean valuesOrdered() {
// because we might have single values? we can dynamically update a flag to reflect that
// based on the atomic field data loaded
return false;
}
@Override
public AtomicNumericFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticSearchException) {
throw (ElasticSearchException) e;
} else {
throw new ElasticSearchException(e.getMessage(), e);
}
}
}
@Override
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
return LongArrayAtomicFieldData.EMPTY;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
final TLongArrayList values = new TLongArrayList();
values.add(0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
try {
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric64Bit(terms.iterator(null)), reader.getLiveDocs());
BytesRef term;
long max = Long.MIN_VALUE;
long min = Long.MAX_VALUE;
while ((term = iter.next()) != null) {
long value = NumericUtils.prefixCodedToLong(term);
values.add(value);
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
Ordinals build = builder.build(fieldDataType.getSettings());
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
// since the default mapping for numeric is long, its worth optimizing the actual type used to represent the data
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
@Override
public byte get(int index) {
return (byte) values.get(index);
}
@Override
public byte[] toArray() {
byte[] bValues = new byte[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
bValues[i++] = (byte) it.next();
}
return bValues;
}
});
} else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
return ShortArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ShortArrayIndexFieldData.BuilderShorts() {
@Override
public short get(int index) {
return (short) values.get(index);
}
@Override
public short[] toArray() {
short[] sValues = new short[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
sValues[i++] = (short) it.next();
}
return sValues;
}
@Override
public int size() {
return values.size();
}
});
} else if (min >= Integer.MIN_VALUE && max <= Integer.MAX_VALUE) {
return IntArrayIndexFieldData.build(reader, fieldDataType, builder, build, new IntArrayIndexFieldData.BuilderIntegers() {
@Override
public int get(int index) {
return (int) values.get(index);
}
@Override
public int[] toArray() {
int[] iValues = new int[values.size()];
int i = 0;
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
iValues[i++] = (int) it.next();
}
return iValues;
}
@Override
public int size() {
return values.size();
}
});
}
}
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_LONG + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_LONG;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new LongArrayAtomicFieldData.WithOrdinals(values.toArray(new long[values.size()]), reader.maxDoc(), build);
}
long[] sValues = new long[reader.maxDoc()];
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
}
if (set == null) {
return new LongArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else {
return new LongArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new LongArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
} finally {
builder.close();
}
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
return new LongValuesComparatorSource(this, missingValue, sortMode);
}
}

View File

@ -19,25 +19,25 @@
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
/**
* {@link AtomicNumericFieldData} implementation which stores data in packed arrays to save memory.
*/
public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
public abstract class PackedArrayAtomicFieldData extends AtomicNumericFieldData {
public static final LongArrayAtomicFieldData EMPTY = new Empty();
public static final PackedArrayAtomicFieldData EMPTY = new Empty();
protected final long[] values;
private final int numDocs;
protected long size = -1;
public LongArrayAtomicFieldData(long[] values, int numDocs) {
public PackedArrayAtomicFieldData(int numDocs) {
super(false);
this.values = values;
this.numDocs = numDocs;
}
@ -50,10 +50,10 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
return numDocs;
}
static class Empty extends LongArrayAtomicFieldData {
static class Empty extends PackedArrayAtomicFieldData {
Empty() {
super(null, 0);
super(0);
}
@Override
@ -92,12 +92,14 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
}
}
public static class WithOrdinals extends LongArrayAtomicFieldData {
public static class WithOrdinals extends PackedArrayAtomicFieldData {
private final MonotonicAppendingLongBuffer values;
private final Ordinals ordinals;
public WithOrdinals(long[] values, int numDocs, Ordinals ordinals) {
super(values, numDocs);
public WithOrdinals(MonotonicAppendingLongBuffer values, int numDocs, Ordinals ordinals) {
super(numDocs);
this.values = values;
this.ordinals = ordinals;
}
@ -114,7 +116,7 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_LONG) + ordinals.getMemorySizeInBytes();
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.ramBytesUsed() + ordinals.getMemorySizeInBytes();
}
return size;
}
@ -131,31 +133,31 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
private final long[] values;
private final MonotonicAppendingLongBuffer values;
LongValues(long[] values, Ordinals.Docs ordinals) {
LongValues(MonotonicAppendingLongBuffer values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public long getValueByOrd(int ord) {
return values[ord];
return ord == 0 ? 0L : values.get(ord - 1);
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
private final long[] values;
private final MonotonicAppendingLongBuffer values;
DoubleValues(long[] values, Ordinals.Docs ordinals) {
DoubleValues(MonotonicAppendingLongBuffer values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public double getValueByOrd(int ord) {
return values[ord];
return ord == 0 ? 0L : values.get(ord - 1);
}
@ -163,16 +165,20 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
}
/**
* A single valued case, where not all values are "set", so we have a FixedBitSet that
* indicates which values have an actual value.
* A single valued case, where not all values are "set", so we have a special
* value which encodes the fact that the document has no value.
*/
public static class SingleFixedSet extends LongArrayAtomicFieldData {
public static class SingleSparse extends PackedArrayAtomicFieldData {
private final FixedBitSet set;
private final PackedInts.Mutable values;
private final long minValue;
private final long missingValue;
public SingleFixedSet(long[] values, int numDocs, FixedBitSet set) {
super(values, numDocs);
this.set = set;
public SingleSparse(PackedInts.Mutable values, long minValue, int numDocs, long missingValue) {
super(numDocs);
this.values = values;
this.minValue = minValue;
this.missingValue = missingValue;
}
@Override
@ -188,63 +194,69 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
size = values.ramBytesUsed() + 2 * RamUsage.NUM_BYTES_LONG;
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values, set);
return new LongValues(values, minValue, missingValue);
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values, set);
return new DoubleValues(values, minValue, missingValue);
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
private final long[] values;
private final FixedBitSet set;
private final PackedInts.Mutable values;
private final long minValue;
private final long missingValue;
LongValues(long[] values, FixedBitSet set) {
LongValues(PackedInts.Mutable values, long minValue, long missingValue) {
super(false);
this.values = values;
this.set = set;
this.minValue = minValue;
this.missingValue = missingValue;
}
@Override
public boolean hasValue(int docId) {
return set.get(docId);
return values.get(docId) != missingValue;
}
@Override
public long getValue(int docId) {
return values[docId];
final long value = values.get(docId);
return value == missingValue ? 0L : minValue + value;
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
private final long[] values;
private final FixedBitSet set;
private final PackedInts.Mutable values;
private final long minValue;
private final long missingValue;
DoubleValues(long[] values, FixedBitSet set) {
DoubleValues(PackedInts.Mutable values, long minValue, long missingValue) {
super(false);
this.values = values;
this.set = set;
this.minValue = minValue;
this.missingValue = missingValue;
}
@Override
public boolean hasValue(int docId) {
return set.get(docId);
return values.get(docId) != missingValue;
}
@Override
public double getValue(int docId) {
return (double) values[docId];
final long value = values.get(docId);
return value == missingValue ? 0L : minValue + value;
}
}
}
@ -252,14 +264,19 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
/**
* Assumes all the values are "set", and docId is used as the index to the value array.
*/
public static class Single extends LongArrayAtomicFieldData {
public static class Single extends PackedArrayAtomicFieldData {
private final PackedInts.Mutable values;
private final long minValue;
/**
* Note, here, we assume that there is no offset by 1 from docId, so position 0
* is the value for docId 0.
*/
public Single(long[] values, int numDocs) {
super(values, numDocs);
public Single(PackedInts.Mutable values, long minValue, int numDocs) {
super(numDocs);
this.values = values;
this.minValue = minValue;
}
@Override
@ -275,49 +292,53 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_LONG);
size = values.ramBytesUsed();
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values);
return new LongValues(values, minValue);
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values);
return new DoubleValues(values, minValue);
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
private final long[] values;
private final PackedInts.Mutable values;
private final long minValue;
LongValues(long[] values) {
LongValues(PackedInts.Mutable values, long minValue) {
super(false);
this.values = values;
this.minValue = minValue;
}
@Override
public long getValue(int docId) {
return values[docId];
return minValue + values.get(docId);
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
private final long[] values;
private final PackedInts.Mutable values;
private final long minValue;
DoubleValues(long[] values) {
DoubleValues(PackedInts.Mutable values, long minValue) {
super(false);
this.values = values;
this.minValue = minValue;
}
@Override
public double getValue(int docId) {
return (double) values[docId];
return minValue + values.get(docId);
}
}

View File

@ -0,0 +1,203 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import com.google.common.base.Preconditions;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
import java.util.EnumSet;
/**
* Stores numeric data into bit-packed arrays for better memory efficiency.
*/
public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
public static class Builder implements IndexFieldData.Builder {
private NumericType numericType;
public Builder setNumericType(NumericType numericType) {
this.numericType = numericType;
return this;
}
@Override
public IndexFieldData<AtomicNumericFieldData> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new PackedArrayIndexFieldData(index, indexSettings, fieldNames, type, cache, numericType);
}
}
private final NumericType numericType;
public PackedArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache, NumericType numericType) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
Preconditions.checkNotNull(numericType);
Preconditions.checkArgument(EnumSet.of(NumericType.BYTE, NumericType.SHORT, NumericType.INT, NumericType.LONG).contains(numericType), getClass().getSimpleName() + " only supports integer types, not " + numericType);
this.numericType = numericType;
}
@Override
public NumericType getNumericType() {
return numericType;
}
@Override
public boolean valuesOrdered() {
// because we might have single values? we can dynamically update a flag to reflect that
// based on the atomic field data loaded
return false;
}
@Override
public AtomicNumericFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticSearchException) {
throw (ElasticSearchException) e;
} else {
throw new ElasticSearchException(e.getMessage(), e);
}
}
}
@Override
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
return PackedArrayAtomicFieldData.EMPTY;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
// Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
// longs is going to be monotonically increasing
final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
try {
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
BytesRef term;
assert !getNumericType().isFloatingPoint();
final boolean indexedAsLong = getNumericType().requiredBits() > 32;
while ((term = iter.next()) != null) {
final long value = indexedAsLong
? NumericUtils.prefixCodedToLong(term)
: NumericUtils.prefixCodedToInt(term);
assert values.size() == 0 || value > values.get(values.size() - 1);
values.add(value);
}
Ordinals build = builder.build(fieldDataType.getSettings());
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
long minValue, maxValue;
minValue = maxValue = 0;
if (values.size() > 0) {
minValue = values.get(0);
maxValue = values.get(values.size() - 1);
}
// Encode document without a value with a special value
long missingValue = 0;
if (set != null) {
if ((maxValue - minValue + 1) == values.size()) {
// values are dense
if (minValue > Long.MIN_VALUE) {
missingValue = --minValue;
} else {
assert maxValue != Long.MAX_VALUE;
missingValue = ++maxValue;
}
} else {
for (long i = 1; i < values.size(); ++i) {
if (values.get(i) > values.get(i - 1) + 1) {
missingValue = values.get(i - 1) + 1;
break;
}
}
}
missingValue -= minValue; // delta
}
final long delta = maxValue - minValue;
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
final long uniqueValuesSize = values.ramBytesUsed();
final long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
return new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
}
final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
if (missingValue != 0) {
sValues.fill(0, sValues.size(), missingValue);
}
for (int i = 0; i < reader.maxDoc(); i++) {
final int ord = ordinals.getOrd(i);
if (ord > 0) {
sValues.set(i, values.get(ord - 1) - minValue);
}
}
if (set == null) {
return new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc());
} else {
return new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue);
}
} else {
return new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
}
} finally {
builder.close();
}
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
return new LongValuesComparatorSource(this, missingValue, sortMode);
}
}

View File

@ -60,8 +60,6 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
}
final PagedBytes bytes = new PagedBytes(15);
int startBytesBPV;
int startNumUniqueTerms;
int maxDoc = reader.maxDoc();
final int termCountHardLimit;
@ -82,19 +80,13 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
}
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms * 4);
startNumUniqueTerms = (int) numUniqueTerms;
} else {
startBytesBPV = 1;
startNumUniqueTerms = 1;
}
final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
termOrdToBytesOffset.add(0); // first ord is reserved for missing values
boolean preDefineBitsRequired = regex == null && frequency == null;
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio);
try {
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());

View File

@ -1,324 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
/**
*/
public abstract class ShortArrayAtomicFieldData extends AtomicNumericFieldData {
public static final ShortArrayAtomicFieldData EMPTY = new Empty();
protected final short[] values;
private final int numDocs;
protected long size = -1;
public ShortArrayAtomicFieldData(short[] values, int numDocs) {
super(false);
this.values = values;
this.numDocs = numDocs;
}
@Override
public void close() {
}
@Override
public int getNumDocs() {
return numDocs;
}
static class Empty extends ShortArrayAtomicFieldData {
Empty() {
super(null, 0);
}
@Override
public LongValues getLongValues() {
return LongValues.EMPTY;
}
@Override
public DoubleValues getDoubleValues() {
return DoubleValues.EMPTY;
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
return 0;
}
@Override
public BytesValues getBytesValues() {
return BytesValues.EMPTY;
}
@Override
public ScriptDocValues getScriptValues() {
return ScriptDocValues.EMPTY;
}
}
public static class WithOrdinals extends ShortArrayAtomicFieldData {
private final Ordinals ordinals;
public WithOrdinals(short[] values, int numDocs, Ordinals ordinals) {
super(values, numDocs);
this.ordinals = ordinals;
}
@Override
public boolean isMultiValued() {
return ordinals.isMultiValued();
}
@Override
public boolean isValuesOrdered() {
return true;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_SHORT) + ordinals.getMemorySizeInBytes();
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values, ordinals.ordinals());
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values, ordinals.ordinals());
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
private final short[] values;
LongValues(short[] values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public long getValueByOrd(int ord) {
return (long) values[ord];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
private final short[] values;
DoubleValues(short[] values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public double getValueByOrd(int ord) {
return values[ord];
}
}
}
/**
* A single valued case, where not all values are "set", so we have a FixedBitSet that
* indicates which values have an actual value.
*/
public static class SingleFixedSet extends ShortArrayAtomicFieldData {
private final FixedBitSet set;
public SingleFixedSet(short[] values, int numDocs, FixedBitSet set) {
super(values, numDocs);
this.set = set;
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_SHORT) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values, set);
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values, set);
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
private final short[] values;
private final FixedBitSet set;
LongValues(short[] values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
}
@Override
public boolean hasValue(int docId) {
return set.get(docId);
}
@Override
public long getValue(int docId) {
return (long) values[docId];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
private final short[] values;
private final FixedBitSet set;
DoubleValues(short[] values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
}
@Override
public boolean hasValue(int docId) {
return set.get(docId);
}
@Override
public double getValue(int docId) {
return (double) values[docId];
}
}
}
/**
* Assumes all the values are "set", and docId is used as the index to the value array.
*/
public static class Single extends ShortArrayAtomicFieldData {
/**
* Note, here, we assume that there is no offset by 1 from docId, so position 0
* is the value for docId 0.
*/
public Single(short[] values, int numDocs) {
super(values, numDocs);
}
@Override
public boolean isMultiValued() {
return false;
}
@Override
public boolean isValuesOrdered() {
return false;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_SHORT);
}
return size;
}
@Override
public LongValues getLongValues() {
return new LongValues(values);
}
@Override
public DoubleValues getDoubleValues() {
return new DoubleValues(values);
}
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
private final short[] values;
LongValues(short[] values) {
super(false);
this.values = values;
}
@Override
public long getValue(int docId) {
return (long) values[docId];
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
private final short[] values;
DoubleValues(short[] values) {
super(false);
this.values = values;
}
@Override
public double getValue(int docId) {
return (double) values[docId];
}
}
}
}

View File

@ -1,199 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.iterator.TShortIterator;
import gnu.trove.list.array.TShortArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.ShortValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class ShortArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new ShortArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
public ShortArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
super(index, indexSettings, fieldNames, fieldDataType, cache);
}
@Override
public NumericType getNumericType() {
return NumericType.SHORT;
}
@Override
public boolean valuesOrdered() {
// because we might have single values? we can dynamically update a flag to reflect that
// based on the atomic field data loaded
return false;
}
@Override
public AtomicNumericFieldData load(AtomicReaderContext context) {
try {
return cache.load(context, this);
} catch (Throwable e) {
if (e instanceof ElasticSearchException) {
throw (ElasticSearchException) e;
} else {
throw new ElasticSearchException(e.getMessage(), e);
}
}
}
@Override
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
AtomicReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
if (terms == null) {
return ShortArrayAtomicFieldData.EMPTY;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
final TShortArrayList values = new TShortArrayList();
values.add((short) 0); // first "t" indicates null value
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
try {
BytesRef term;
short max = Short.MIN_VALUE;
short min = Short.MAX_VALUE;
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
while ((term = iter.next()) != null) {
short value = (short) NumericUtils.prefixCodedToInt(term);
values.add(value);
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
Ordinals build = builder.build(fieldDataType.getSettings());
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
// if we can fit all our values in a byte we should do this!
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
@Override
public byte get(int index) {
return (byte) values.get(index);
}
@Override
public byte[] toArray() {
byte[] bValues = new byte[values.size()];
int i = 0;
for (TShortIterator it = values.iterator(); it.hasNext(); ) {
bValues[i++] = (byte) it.next();
}
return bValues;
}
});
}
}
return build(reader, fieldDataType, builder, build, new BuilderShorts() {
@Override
public short get(int index) {
return values.get(index);
}
@Override
public short[] toArray() {
return values.toArray();
}
@Override
public int size() {
return values.size();
}
});
} finally {
builder.close();
}
}
static interface BuilderShorts {
short get(int index);
short[] toArray();
int size();
}
static ShortArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderShorts values) {
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
Docs ordinals = build.ordinals();
final FixedBitSet set = builder.buildDocsWithValuesSet();
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_SHORT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_SHORT;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new ShortArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
short[] sValues = new short[reader.maxDoc()];
int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
}
if (set == null) {
return new ShortArrayAtomicFieldData.Single(sValues, reader.maxDoc());
} else {
return new ShortArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
}
} else {
return new ShortArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
}
}
@Override
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
return new ShortValuesComparatorSource(this, missingValue, sortMode);
}
}

View File

@ -0,0 +1,147 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.benchmark.fielddata;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import java.util.Random;
public class LongFieldDataBenchmark {
private static final Random RANDOM = new Random();
private static final int SECONDS_PER_YEAR = 60 * 60 * 24 * 365;
public static enum Data {
SINGLE_VALUES_DENSE_ENUM {
public int numValues() {
return 1;
}
@Override
public long nextValue() {
return RANDOM.nextInt(16);
}
},
SINGLE_VALUED_DENSE_DATE {
public int numValues() {
return 1;
}
@Override
public long nextValue() {
// somewhere in-between 2010 and 2012
return 1000L * (40L * SECONDS_PER_YEAR + RANDOM.nextInt(2 * SECONDS_PER_YEAR));
}
},
MULTI_VALUED_DATE {
public int numValues() {
return RANDOM.nextInt(3);
}
@Override
public long nextValue() {
// somewhere in-between 2010 and 2012
return 1000L * (40L * SECONDS_PER_YEAR + RANDOM.nextInt(2 * SECONDS_PER_YEAR));
}
},
MULTI_VALUED_ENUM {
public int numValues() {
return RANDOM.nextInt(3);
}
@Override
public long nextValue() {
return 3 + RANDOM.nextInt(8);
}
},
SINGLE_VALUED_SPARSE_RANDOM {
public int numValues() {
return RANDOM.nextFloat() < 0.1f ? 1 : 0;
}
@Override
public long nextValue() {
return RANDOM.nextLong();
}
},
MULTI_VALUED_SPARSE_RANDOM {
public int numValues() {
return RANDOM.nextFloat() < 0.1f ? 1 + RANDOM.nextInt(5) : 0;
}
@Override
public long nextValue() {
return RANDOM.nextLong();
}
},
MULTI_VALUED_DENSE_RANDOM {
public int numValues() {
return 1 + RANDOM.nextInt(3);
}
@Override
public long nextValue() {
return RANDOM.nextLong();
}
};
public abstract int numValues();
public abstract long nextValue();
}
public static void main(String[] args) throws Exception {
final IndexWriterConfig iwc = new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer());
final String fieldName = "f";
final int numDocs = 1000000;
System.out.println("Data\tLoading time\tImplementation\tActual size\tExpected size");
for (Data data : Data.values()) {
final RAMDirectory dir = new RAMDirectory();
final IndexWriter indexWriter = new IndexWriter(dir, iwc);
for (int i = 0; i < numDocs; ++i) {
final Document doc = new Document();
final int numFields = data.numValues();
for (int j = 0; j < numFields; ++j) {
doc.add(new LongField(fieldName, data.nextValue(), Store.NO));
}
indexWriter.addDocument(doc);
}
indexWriter.forceMerge(1);
indexWriter.close();
final DirectoryReader dr = DirectoryReader.open(dir);
final IndexFieldDataService fds = new IndexFieldDataService(new Index("dummy"));
final IndexNumericFieldData<AtomicNumericFieldData> fd = fds.getForField(new FieldMapper.Names(fieldName), new FieldDataType("long"));
final long start = System.nanoTime();
final AtomicNumericFieldData afd = fd.loadDirect(SlowCompositeReaderWrapper.wrap(dr).getContext());
final long loadingTimeMs = (System.nanoTime() - start) / 1000 / 1000;
System.out.println(data + "\t" + loadingTimeMs + "\t" + afd.getClass().getSimpleName() + "\t" + RamUsageEstimator.humanSizeOf(afd.getLongValues()) + "\t" + RamUsageEstimator.humanReadableUnits(afd.getMemorySizeInBytes()));
dr.close();
}
}
}

View File

@ -34,7 +34,6 @@ import org.elasticsearch.search.facet.FacetBuilder;
import java.util.Date;
import static org.elasticsearch.client.Requests.createIndexRequest;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
@ -63,7 +62,7 @@ public class HistogramFacetSearchBenchmark {
Client client = node1.client();
long COUNT = SizeValue.parseSizeValue("5m").singles();
long COUNT = SizeValue.parseSizeValue("20m").singles();
int BATCH = 500;
int QUERY_WARMUP = 20;
int QUERY_COUNT = 200;
@ -76,9 +75,32 @@ public class HistogramFacetSearchBenchmark {
Thread.sleep(10000);
try {
client.admin().indices().create(createIndexRequest("test").settings(
settingsBuilder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0)))
.actionGet();
client.admin().indices().prepareCreate("test")
.setSettings(settingsBuilder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0))
.addMapping("test", jsonBuilder()
.startObject()
.startObject("test")
.startObject("properties")
.startObject("name")
.startObject("fields")
.startObject("l_value")
.field("type", "long")
.endObject()
.startObject("i_value")
.field("type", "integer")
.endObject()
.startObject("s_value")
.field("type", "short")
.endObject()
.startObject("b_value")
.field("type", "byte")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.endObject())
.execute().actionGet();
StopWatch stopWatch = new StopWatch().start();
@ -90,9 +112,13 @@ public class HistogramFacetSearchBenchmark {
BulkRequestBuilder request = client.prepareBulk();
for (int j = 0; j < BATCH; j++) {
counter++;
final long value = lValues[counter % lValues.length];
XContentBuilder source = jsonBuilder().startObject()
.field("id", Integer.valueOf(counter))
.field("l_value", lValues[counter % lValues.length])
.field("l_value", value)
.field("i_value", (int) value)
.field("s_value", (short) value)
.field("b_value", (byte) value)
.field("date", new Date())
.endObject();
request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(counter))
@ -125,6 +151,9 @@ public class HistogramFacetSearchBenchmark {
SearchResponse searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(histogramFacet("l_value").field("l_value").interval(4))
.addFacet(histogramFacet("i_value").field("i_value").interval(4))
.addFacet(histogramFacet("s_value").field("s_value").interval(4))
.addFacet(histogramFacet("b_value").field("b_value").interval(4))
.addFacet(histogramFacet("date").field("date").interval(1000))
.execute().actionGet();
if (j == 0) {
@ -137,30 +166,33 @@ public class HistogramFacetSearchBenchmark {
System.out.println("--> Warmup DONE");
long totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(histogramFacet("l_value").field("l_value").interval(4))
.execute().actionGet();
if (searchResponse.getHits().totalHits() != COUNT) {
System.err.println("--> mismatch on hits");
for (String field : new String[] {"b_value", "s_value", "i_value", "l_value"}) {
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(histogramFacet(field).field(field).interval(4))
.execute().actionGet();
if (searchResponse.getHits().totalHits() != COUNT) {
System.err.println("--> mismatch on hits");
}
totalQueryTime += searchResponse.getTookInMillis();
}
totalQueryTime += searchResponse.getTookInMillis();
}
System.out.println("--> Histogram Facet (l_value) " + (totalQueryTime / QUERY_COUNT) + "ms");
System.out.println("--> Histogram Facet (" + field + ") " + (totalQueryTime / QUERY_COUNT) + "ms");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(histogramFacet("l_value").field("l_value").valueField("l_value").interval(4))
.execute().actionGet();
if (searchResponse.getHits().totalHits() != COUNT) {
System.err.println("--> mismatch on hits");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setQuery(matchAllQuery())
.addFacet(histogramFacet("l_value").field("l_value").valueField("l_value").interval(4))
.execute().actionGet();
if (searchResponse.getHits().totalHits() != COUNT) {
System.err.println("--> mismatch on hits");
}
totalQueryTime += searchResponse.getTookInMillis();
}
totalQueryTime += searchResponse.getTookInMillis();
System.out.println("--> Histogram Facet (" + field + "/" + field + ") " + (totalQueryTime / QUERY_COUNT) + "ms");
}
System.out.println("--> Histogram Facet (l_value/l_value) " + (totalQueryTime / QUERY_COUNT) + "ms");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {

View File

@ -20,12 +20,7 @@
package org.elasticsearch.test.unit.index.fielddata;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.*;
import org.apache.lucene.store.RAMDirectory;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.index.Index;
@ -55,7 +50,8 @@ public abstract class AbstractFieldDataTests {
@BeforeMethod
public void setup() throws Exception {
ifdService = new IndexFieldDataService(new Index("test"));
writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)));
// LogByteSizeMP to preserve doc ID order
writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)).setMergePolicy(new LogByteSizeMergePolicy()));
}
protected AtomicReaderContext refreshReader() throws Exception {

View File

@ -1,34 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.fielddata;
import org.elasticsearch.index.fielddata.FieldDataType;
/**
*/
public class ByteFieldDataTests extends IntFieldDataTests {
@Override
protected FieldDataType getFieldDataType() {
return new FieldDataType("byte");
}
}

View File

@ -1,227 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.fielddata;
import org.apache.lucene.document.*;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
/**
*/
public class IntFieldDataTests extends NumericFieldDataTests {
@Override
protected FieldDataType getFieldDataType() {
return new FieldDataType("int");
}
@Test
public void testOptimizeTypeByte() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
}
@Test
public void testOptimizeTypeShort() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
}
@Test
public void testOptimizeTypeInteger() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE));
}
@Override
protected void fillSingleValueAllSet() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new IntField("value", 2, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new IntField("value", 1, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "3", Field.Store.NO));
d.add(new IntField("value", 3, Field.Store.NO));
writer.addDocument(d);
}
@Override
protected void fillSingleValueWithMissing() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new IntField("value", 2, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
//d.add(new StringField("value", one(), Field.Store.NO)); // MISSING....
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "3", Field.Store.NO));
d.add(new IntField("value", 3, Field.Store.NO));
writer.addDocument(d);
}
@Override
protected void fillMultiValueAllSet() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new IntField("value", 2, Field.Store.NO));
d.add(new IntField("value", 4, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new IntField("value", 1, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "3", Field.Store.NO));
d.add(new IntField("value", 3, Field.Store.NO));
writer.addDocument(d);
}
@Override
protected void fillMultiValueWithMissing() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new IntField("value", 2, Field.Store.NO));
d.add(new IntField("value", 4, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
//d.add(new StringField("value", one(), Field.Store.NO)); // MISSING
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "3", Field.Store.NO));
d.add(new IntField("value", 3, Field.Store.NO));
writer.addDocument(d);
}
protected void fillExtendedMvSet() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new IntField("value", 2, Field.Store.NO));
d.add(new IntField("value", 4, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "3", Field.Store.NO));
d.add(new IntField("value", 3, Field.Store.NO));
writer.addDocument(d);
writer.commit();
d = new Document();
d.add(new StringField("_id", "4", Field.Store.NO));
d.add(new IntField("value", 4, Field.Store.NO));
d.add(new IntField("value", 5, Field.Store.NO));
d.add(new IntField("value", 6, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "5", Field.Store.NO));
d.add(new IntField("value", 6, Field.Store.NO));
d.add(new IntField("value", 7, Field.Store.NO));
d.add(new IntField("value", 8, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "6", Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "7", Field.Store.NO));
d.add(new IntField("value", 8, Field.Store.NO));
d.add(new IntField("value", 9, Field.Store.NO));
d.add(new IntField("value", 10, Field.Store.NO));
writer.addDocument(d);
writer.commit();
d = new Document();
d.add(new StringField("_id", "8", Field.Store.NO));
d.add(new IntField("value", -8, Field.Store.NO));
d.add(new IntField("value", -9, Field.Store.NO));
d.add(new IntField("value", -10, Field.Store.NO));
writer.addDocument(d);
}
}

View File

@ -19,92 +19,39 @@
package org.elasticsearch.test.unit.index.fielddata;
import gnu.trove.iterator.TLongIterator;
import gnu.trove.set.TDoubleSet;
import gnu.trove.set.TLongSet;
import gnu.trove.set.hash.TDoubleHashSet;
import gnu.trove.set.hash.TLongHashSet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.LongArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.plain.PackedArrayAtomicFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.joda.time.DateTimeZone;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
/**
* Tests for all integer types (byte, short, int, long).
*/
public class LongFieldDataTests extends NumericFieldDataTests {
@Override
protected FieldDataType getFieldDataType() {
// we don't want to optimize the type so it will always be a long...
return new FieldDataType("long", ImmutableSettings.builder().put("optimize_type", false));
}
@Test
public void testOptimizeTypeByte() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
}
@Test
public void testOptimizeTypeShort() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
}
@Test
public void testOptimizeTypeInteger() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE));
return new FieldDataType("long", ImmutableSettings.builder());
}
@Test
@ -121,7 +68,7 @@ public class LongFieldDataTests extends NumericFieldDataTests {
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(LongArrayAtomicFieldData.class));
assertThat(fieldData, instanceOf(PackedArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE + 1l));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE - 1l));
}
@ -264,4 +211,171 @@ public class LongFieldDataTests extends NumericFieldDataTests {
writer.addDocument(d);
}
private static final int SECONDS_PER_YEAR = 60 * 60 * 24 * 365;
// TODO: use random() when migrating to Junit
public static enum Data {
SINGLE_VALUED_DENSE_ENUM {
public int numValues(Random r) {
return 1;
}
@Override
public long nextValue(Random r) {
return 1 + r.nextInt(16);
}
},
SINGLE_VALUED_DENSE_DATE {
public int numValues(Random r) {
return 1;
}
@Override
public long nextValue(Random r) {
// somewhere in-between 2010 and 2012
return 1000L * (40L * SECONDS_PER_YEAR + r.nextInt(2 * SECONDS_PER_YEAR));
}
},
MULTI_VALUED_DATE {
public int numValues(Random r) {
return r.nextInt(3);
}
@Override
public long nextValue(Random r) {
// somewhere in-between 2010 and 2012
return 1000L * (40L * SECONDS_PER_YEAR + r.nextInt(2 * SECONDS_PER_YEAR));
}
},
MULTI_VALUED_ENUM {
public int numValues(Random r) {
return r.nextInt(3);
}
@Override
public long nextValue(Random r) {
return 3 + r.nextInt(8);
}
},
SINGLE_VALUED_SPARSE_RANDOM {
public int numValues(Random r) {
return r.nextFloat() < 0.1f ? 1 : 0;
}
@Override
public long nextValue(Random r) {
return r.nextLong();
}
},
MULTI_VALUED_SPARSE_RANDOM {
public int numValues(Random r) {
return r.nextFloat() < 0.1f ? 1 + r.nextInt(5) : 0;
}
@Override
public long nextValue(Random r) {
return r.nextLong();
}
},
MULTI_VALUED_DENSE_RANDOM {
public int numValues(Random r) {
return 1 + r.nextInt(3);
}
@Override
public long nextValue(Random r) {
return r.nextLong();
}
};
public abstract int numValues(Random r);
public abstract long nextValue(Random r);
}
private void test(List<TLongSet> values) throws Exception {
StringField id = new StringField("_id", "", Field.Store.NO);
for (int i = 0; i < values.size(); ++i) {
Document doc = new Document();
id.setStringValue("" + i);
doc.add(id);
final TLongSet v = values.get(i);
for (TLongIterator it = v.iterator(); it.hasNext(); ) {
LongField value = new LongField("value", it.next(), Field.Store.NO);
doc.add(value);
}
writer.addDocument(doc);
}
writer.forceMerge(1);
final IndexNumericFieldData indexFieldData = getForField("value");
final AtomicNumericFieldData atomicFieldData = indexFieldData.load(refreshReader());
final LongValues data = atomicFieldData.getLongValues();
final DoubleValues doubleData = atomicFieldData.getDoubleValues();
final TLongSet set = new TLongHashSet();
final TDoubleSet doubleSet = new TDoubleHashSet();
for (int i = 0; i < values.size(); ++i) {
final TLongSet v = values.get(i);
assertThat(data.hasValue(i), equalTo(!v.isEmpty()));
assertThat(doubleData.hasValue(i), equalTo(!v.isEmpty()));
if (v.isEmpty()) {
assertThat(data.getValue(i), equalTo(0L));
assertThat(doubleData.getValue(i), equalTo(0d));
}
set.clear();
for (LongValues.Iter iter = data.getIter(i); iter.hasNext(); ) {
set.add(iter.next());
}
assertThat(set, equalTo(v));
final TDoubleSet doubleV = new TDoubleHashSet();
for (TLongIterator it = v.iterator(); it.hasNext(); ) {
doubleV.add((double) it.next());
}
doubleSet.clear();
for (DoubleValues.Iter iter = doubleData.getIter(i); iter.hasNext(); ) {
doubleSet.add(iter.next());
}
assertThat(doubleSet, equalTo(doubleV));
}
}
private void test(Data data) throws Exception {
Random r = new Random(data.ordinal());
final int numDocs = 1000 + r.nextInt(19000);
final List<TLongSet> values = new ArrayList<TLongSet>(numDocs);
for (int i = 0; i < numDocs; ++i) {
final int numValues = data.numValues(r);
final TLongSet vals = new TLongHashSet(numValues);
for (int j = 0; j < numValues; ++j) {
vals.add(data.nextValue(r));
}
values.add(vals);
}
test(values);
}
public void testSingleValuedDenseEnum() throws Exception {
test(Data.SINGLE_VALUED_DENSE_ENUM);
}
public void testSingleValuedDenseDate() throws Exception {
test(Data.SINGLE_VALUED_DENSE_DATE);
}
public void testSingleValuedSparseRandom() throws Exception {
test(Data.SINGLE_VALUED_SPARSE_RANDOM);
}
public void testMultiValuedDate() throws Exception {
test(Data.MULTI_VALUED_DATE);
}
public void testMultiValuedEnum() throws Exception {
test(Data.MULTI_VALUED_ENUM);
}
public void testMultiValuedSparseRandom() throws Exception {
test(Data.MULTI_VALUED_SPARSE_RANDOM);
}
public void testMultiValuedDenseRandom() throws Exception {
test(Data.MULTI_VALUED_DENSE_RANDOM);
}
}

View File

@ -1,85 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.fielddata;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
import org.elasticsearch.index.mapper.FieldMapper;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
/**
*/
public class ShortFieldDataTests extends IntFieldDataTests {
@Override
protected FieldDataType getFieldDataType() {
return new FieldDataType("short");
}
@Test
public void testOptimizeTypeByte() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
}
@Test
public void testOptimizeTypeShort() throws Exception {
Document d = new Document();
d.add(new StringField("_id", "1", Field.Store.NO));
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
writer.addDocument(d);
d = new Document();
d.add(new StringField("_id", "2", Field.Store.NO));
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
writer.addDocument(d);
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
}
}

View File

@ -24,9 +24,9 @@ import org.apache.lucene.document.IntField;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.fieldcomparator.ByteValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.plain.ByteArrayIndexFieldData;
/**
*/
@ -39,7 +39,7 @@ public class ByteNestedSortingTests extends AbstractNumberNestedSortingTests {
@Override
protected IndexFieldData.XFieldComparatorSource createInnerFieldComparator(String fieldName, SortMode sortMode, Object missingValue) {
ByteArrayIndexFieldData fieldData = getForField(fieldName);
IndexNumericFieldData fieldData = getForField(fieldName);
return new ByteValuesComparatorSource(fieldData, missingValue, sortMode);
}

View File

@ -24,9 +24,9 @@ import org.apache.lucene.document.IntField;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.fieldcomparator.IntValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.plain.IntArrayIndexFieldData;
/**
*/
@ -39,7 +39,7 @@ public class IntegerNestedSortingTests extends AbstractNumberNestedSortingTests
@Override
protected IndexFieldData.XFieldComparatorSource createInnerFieldComparator(String fieldName, SortMode sortMode, Object missingValue) {
IntArrayIndexFieldData fieldData = getForField(fieldName);
IndexNumericFieldData fieldData = getForField(fieldName);
return new IntValuesComparatorSource(fieldData, missingValue, sortMode);
}

View File

@ -26,7 +26,7 @@ import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.plain.LongArrayIndexFieldData;
import org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData;
/**
*/
@ -39,7 +39,7 @@ public class LongNestedSortingTests extends AbstractNumberNestedSortingTests {
@Override
protected IndexFieldData.XFieldComparatorSource createInnerFieldComparator(String fieldName, SortMode sortMode, Object missingValue) {
LongArrayIndexFieldData fieldData = getForField(fieldName);
PackedArrayIndexFieldData fieldData = getForField(fieldName);
return new LongValuesComparatorSource(fieldData, missingValue, sortMode);
}

View File

@ -24,9 +24,9 @@ import org.apache.lucene.document.IntField;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.fieldcomparator.ShortValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
import org.elasticsearch.index.fielddata.plain.ShortArrayIndexFieldData;
/**
*/
@ -39,7 +39,7 @@ public class ShortNestedSortingTests extends AbstractNumberNestedSortingTests {
@Override
protected IndexFieldData.XFieldComparatorSource createInnerFieldComparator(String fieldName, SortMode sortMode, Object missingValue) {
ShortArrayIndexFieldData fieldData = getForField(fieldName);
IndexNumericFieldData fieldData = getForField(fieldName);
return new ShortValuesComparatorSource(fieldData, missingValue, sortMode);
}