Merge integer field data implementations.
This commit merges field data implementations for byte, short, int and long data into PackedArrayAtomicFieldData which uses Lucene's PackedInts API to store data. Close #3220
This commit is contained in:
parent
5a20ba5ff2
commit
2fb5d3ff51
|
@ -50,10 +50,10 @@ public class IndexFieldDataService extends AbstractIndexComponent {
|
|||
.put("string", new PagedBytesIndexFieldData.Builder())
|
||||
.put("float", new FloatArrayIndexFieldData.Builder())
|
||||
.put("double", new DoubleArrayIndexFieldData.Builder())
|
||||
.put("byte", new ByteArrayIndexFieldData.Builder())
|
||||
.put("short", new ShortArrayIndexFieldData.Builder())
|
||||
.put("int", new IntArrayIndexFieldData.Builder())
|
||||
.put("long", new LongArrayIndexFieldData.Builder())
|
||||
.put("byte", new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
|
||||
.put("short", new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
|
||||
.put("int", new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
|
||||
.put("long", new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
|
||||
.put("geo_point", new GeoPointDoubleArrayIndexFieldData.Builder())
|
||||
.immutableMap();
|
||||
|
||||
|
@ -62,10 +62,10 @@ public class IndexFieldDataService extends AbstractIndexComponent {
|
|||
.put(Tuple.tuple("string", "fst"), new FSTBytesIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("float", "array"), new FloatArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("byte", "array"), new ByteArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("short", "array"), new ShortArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("int", "array"), new IntArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("long", "array"), new LongArrayIndexFieldData.Builder())
|
||||
.put(Tuple.tuple("byte", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.BYTE))
|
||||
.put(Tuple.tuple("short", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.SHORT))
|
||||
.put(Tuple.tuple("int", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.INT))
|
||||
.put(Tuple.tuple("long", "array"), new PackedArrayIndexFieldData.Builder().setNumericType(IndexNumericFieldData.NumericType.LONG))
|
||||
.put(Tuple.tuple("geo_point", "array"), new GeoPointDoubleArrayIndexFieldData.Builder())
|
||||
.immutableMap();
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
package org.elasticsearch.index.fielddata;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -94,6 +96,13 @@ public interface IndexNumericFieldData<FD extends AtomicNumericFieldData> extend
|
|||
|
||||
public abstract boolean isFloatingPoint();
|
||||
public abstract int requiredBits();
|
||||
public final TermsEnum wrapTermsEnum(TermsEnum termsEnum) {
|
||||
if (requiredBits() > 32) {
|
||||
return OrdinalsBuilder.wrapNumeric64Bit(termsEnum);
|
||||
} else {
|
||||
return OrdinalsBuilder.wrapNumeric32Bit(termsEnum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NumericType getNumericType();
|
||||
|
|
|
@ -36,7 +36,7 @@ import java.util.Comparator;
|
|||
|
||||
/**
|
||||
* Simple class to build document ID <-> ordinal mapping. Note: Ordinals are
|
||||
* <tt>1</tt> based monotocially increasing positive integers. <tt>0</tt>
|
||||
* <tt>1</tt> based monotonically increasing positive integers. <tt>0</tt>
|
||||
* donates the missing value in this context.
|
||||
*/
|
||||
public final class OrdinalsBuilder implements Closeable {
|
||||
|
@ -55,10 +55,8 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
private int numMultiValuedDocs = 0;
|
||||
private int totalNumOrds = 0;
|
||||
|
||||
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator) throws IOException {
|
||||
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator, float acceptableOverheadRatio) throws IOException {
|
||||
this.maxDoc = maxDoc;
|
||||
// TODO: Make configurable...
|
||||
float acceptableOverheadRatio = PackedInts.FAST;
|
||||
if (preDefineBitsRequired) {
|
||||
int numTerms = (int) terms.size();
|
||||
if (numTerms == -1) {
|
||||
|
@ -75,15 +73,15 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
}
|
||||
|
||||
public OrdinalsBuilder(int maxDoc) throws IOException {
|
||||
this(null, false, maxDoc);
|
||||
this(null, false, maxDoc, PackedInts.DEFAULT);
|
||||
}
|
||||
|
||||
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc) throws IOException {
|
||||
this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator());
|
||||
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, float acceptableOverheadRatio) throws IOException {
|
||||
this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator(), acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
public OrdinalsBuilder(Terms terms, int maxDoc) throws IOException {
|
||||
this(terms, true, maxDoc, new DirectAllocator());
|
||||
public OrdinalsBuilder(Terms terms, int maxDoc, float acceptableOverheadRatio) throws IOException {
|
||||
this(terms, true, maxDoc, new DirectAllocator(), acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -296,7 +294,7 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
* A {@link TermsEnum} that iterates only full precision prefix coded 64 bit values.
|
||||
* @see #buildFromTerms(TermsEnum, Bits)
|
||||
*/
|
||||
public TermsEnum wrapNumeric64Bit(TermsEnum termsEnum) {
|
||||
public static TermsEnum wrapNumeric64Bit(TermsEnum termsEnum) {
|
||||
return new FilteredTermsEnum(termsEnum, false) {
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) throws IOException {
|
||||
|
@ -310,7 +308,7 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
* A {@link TermsEnum} that iterates only full precision prefix coded 32 bit values.
|
||||
* @see #buildFromTerms(TermsEnum, Bits)
|
||||
*/
|
||||
public TermsEnum wrapNumeric32Bit(TermsEnum termsEnum) {
|
||||
public static TermsEnum wrapNumeric32Bit(TermsEnum termsEnum) {
|
||||
return new FilteredTermsEnum(termsEnum, false) {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,329 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
|
||||
/**
|
||||
*/
|
||||
public abstract class ByteArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||
|
||||
public static final ByteArrayAtomicFieldData EMPTY = new Empty();
|
||||
|
||||
protected final byte[] values;
|
||||
private final int numDocs;
|
||||
|
||||
protected long size = -1;
|
||||
|
||||
public ByteArrayAtomicFieldData(byte[] values, int numDocs) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.numDocs = numDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDocs() {
|
||||
return numDocs;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
static class Empty extends ByteArrayAtomicFieldData {
|
||||
|
||||
Empty() {
|
||||
super(null, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return LongValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return DoubleValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesValues getBytesValues() {
|
||||
return BytesValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues getScriptValues() {
|
||||
return ScriptDocValues.EMPTY;
|
||||
}
|
||||
}
|
||||
|
||||
public static class WithOrdinals extends ByteArrayAtomicFieldData {
|
||||
|
||||
private final Ordinals ordinals;
|
||||
|
||||
public WithOrdinals(byte[] values, int numDocs, Ordinals ordinals) {
|
||||
super(values, numDocs);
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return ordinals.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length) + ordinals.getMemorySizeInBytes();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values, ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values, ordinals.ordinals());
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
|
||||
|
||||
private final byte[] values;
|
||||
|
||||
LongValues(byte[] values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueByOrd(int ord) {
|
||||
return (long) values[ord];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
|
||||
|
||||
private final byte[] values;
|
||||
|
||||
DoubleValues(byte[] values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public double getValueByOrd(int ord) {
|
||||
return (double) values[ord];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A single valued case, where not all values are "set", so we have a FixedBitSet that
|
||||
* indicates which values have an actual value.
|
||||
*/
|
||||
public static class SingleFixedSet extends ByteArrayAtomicFieldData {
|
||||
|
||||
private final FixedBitSet set;
|
||||
|
||||
public SingleFixedSet(byte[] values, int numDocs, FixedBitSet set) {
|
||||
super(values, numDocs);
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values, set);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values, set);
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
|
||||
|
||||
private final byte[] values;
|
||||
private final FixedBitSet set;
|
||||
|
||||
LongValues(byte[] values, FixedBitSet set) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return set.get(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue(int docId) {
|
||||
return (long) values[docId];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
|
||||
|
||||
private final byte[] values;
|
||||
private final FixedBitSet set;
|
||||
|
||||
DoubleValues(byte[] values, FixedBitSet set) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return set.get(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue(int docId) {
|
||||
return (double) values[docId];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assumes all the values are "set", and docId is used as the index to the value array.
|
||||
*/
|
||||
public static class Single extends ByteArrayAtomicFieldData {
|
||||
|
||||
/**
|
||||
* Note, here, we assume that there is no offset by 1 from docId, so position 0
|
||||
* is the value for docId 0.
|
||||
*/
|
||||
public Single(byte[] values, int numDocs) {
|
||||
super(values, numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values);
|
||||
}
|
||||
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
|
||||
|
||||
private final byte[] values;
|
||||
|
||||
LongValues(byte[] values) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long getValue(int docId) {
|
||||
return (long) values[docId];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
|
||||
|
||||
private final byte[] values;
|
||||
|
||||
DoubleValues(byte[] values) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue(int docId) {
|
||||
return (double) values[docId];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,148 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import gnu.trove.list.array.TByteArrayList;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.ByteValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class ByteArrayIndexFieldData extends AbstractIndexFieldData<ByteArrayAtomicFieldData> implements IndexNumericFieldData<ByteArrayAtomicFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
@Override
|
||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||
return new ByteArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||
}
|
||||
}
|
||||
|
||||
public ByteArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
|
||||
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericType getNumericType() {
|
||||
return NumericType.BYTE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean valuesOrdered() {
|
||||
// because we might have single values? we can dynamically update a flag to reflect that
|
||||
// based on the atomic field data loaded
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteArrayAtomicFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof ElasticSearchException) {
|
||||
throw (ElasticSearchException) e;
|
||||
} else {
|
||||
throw new ElasticSearchException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
return ByteArrayAtomicFieldData.EMPTY;
|
||||
}
|
||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
final TByteArrayList values = new TByteArrayList();
|
||||
|
||||
values.add((byte) 0); // first "t" indicates null value
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRef term;
|
||||
while ((term = iter.next()) != null) {
|
||||
values.add((byte) NumericUtils.prefixCodedToInt(term));
|
||||
}
|
||||
try {
|
||||
Ordinals build = builder.build(fieldDataType.getSettings());
|
||||
return build(reader, fieldDataType, builder, build, new BuilderBytes() {
|
||||
@Override
|
||||
public byte get(int index) {
|
||||
return values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toArray() {
|
||||
return values.toArray();
|
||||
}
|
||||
});
|
||||
} finally {
|
||||
builder.close();
|
||||
}
|
||||
}
|
||||
|
||||
static interface BuilderBytes {
|
||||
byte get(int index);
|
||||
|
||||
byte[] toArray();
|
||||
}
|
||||
|
||||
static ByteArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderBytes values) {
|
||||
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
|
||||
Docs ordinals = build.ordinals();
|
||||
byte[] sValues = new byte[reader.maxDoc()];
|
||||
int maxDoc = reader.maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
sValues[i] = values.get(ordinals.getOrd(i));
|
||||
}
|
||||
final FixedBitSet set = builder.buildDocsWithValuesSet();
|
||||
if (set == null) {
|
||||
return new ByteArrayAtomicFieldData.Single(sValues, reader.maxDoc());
|
||||
} else {
|
||||
return new ByteArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
|
||||
}
|
||||
} else {
|
||||
return new ByteArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
||||
return new ByteValuesComparatorSource(this, missingValue, sortMode);
|
||||
}
|
||||
}
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
|
@ -94,9 +95,10 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
|
|||
final TDoubleArrayList values = new TDoubleArrayList();
|
||||
|
||||
values.add(0); // first "t" indicates null value
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
||||
try {
|
||||
final BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric64Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRef term;
|
||||
while ((term = iter.next()) != null) {
|
||||
values.add(NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term)));
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.util.fst.FST;
|
|||
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
|
@ -65,7 +66,8 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
|
|||
final IntsRef scratch = new IntsRef();
|
||||
|
||||
boolean preDefineBitsRequired = regex == null && frequency == null;
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio);
|
||||
try {
|
||||
|
||||
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
|
@ -94,9 +95,10 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
|
|||
|
||||
values.add(0); // first "t" indicates null value
|
||||
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRef term;
|
||||
while ((term = iter.next()) != null) {
|
||||
values.add(NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term)));
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.AtomicReader;
|
|||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
@ -86,7 +87,8 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
|
|||
final TDoubleArrayList lon = new TDoubleArrayList();
|
||||
lat.add(0); // first "t" indicates null value
|
||||
lon.add(0); // first "t" indicates null value
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
||||
final CharsRef spare = new CharsRef();
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null), reader.getLiveDocs());
|
||||
|
|
|
@ -1,326 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
|
||||
/**
|
||||
*/
|
||||
public abstract class IntArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||
|
||||
public static final IntArrayAtomicFieldData EMPTY = new Empty();
|
||||
|
||||
protected final int[] values;
|
||||
private final int numDocs;
|
||||
|
||||
protected long size = -1;
|
||||
|
||||
public IntArrayAtomicFieldData(int[] values, int numDocs) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.numDocs = numDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDocs() {
|
||||
return numDocs;
|
||||
}
|
||||
|
||||
static class Empty extends IntArrayAtomicFieldData {
|
||||
|
||||
Empty() {
|
||||
super(null, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return LongValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return DoubleValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesValues getBytesValues() {
|
||||
return BytesValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues getScriptValues() {
|
||||
return ScriptDocValues.EMPTY;
|
||||
}
|
||||
}
|
||||
|
||||
public static class WithOrdinals extends IntArrayAtomicFieldData {
|
||||
|
||||
private final Ordinals ordinals;
|
||||
|
||||
public WithOrdinals(int[] values, int numDocs, Ordinals ordinals) {
|
||||
super(values, numDocs);
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return ordinals.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_INT) + ordinals.getMemorySizeInBytes();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values, ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values, ordinals.ordinals());
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
|
||||
|
||||
private final int[] values;
|
||||
|
||||
LongValues(int[] values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueByOrd(int ord) {
|
||||
return (long) values[ord];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
|
||||
|
||||
private final int[] values;
|
||||
|
||||
DoubleValues(int[] values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValueByOrd(int ord) {
|
||||
return values[ord];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A single valued case, where not all values are "set", so we have a FixedBitSet that
|
||||
* indicates which values have an actual value.
|
||||
*/
|
||||
public static class SingleFixedSet extends IntArrayAtomicFieldData {
|
||||
|
||||
private final FixedBitSet set;
|
||||
|
||||
public SingleFixedSet(int[] values, int numDocs, FixedBitSet set) {
|
||||
super(values, numDocs);
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_INT) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values, set);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values, set);
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
|
||||
|
||||
private final int[] values;
|
||||
private final FixedBitSet set;
|
||||
|
||||
LongValues(int[] values, FixedBitSet set) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return set.get(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue(int docId) {
|
||||
return (long) values[docId];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
|
||||
|
||||
private final int[] values;
|
||||
private final FixedBitSet set;
|
||||
|
||||
DoubleValues(int[] values, FixedBitSet set) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return set.get(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue(int docId) {
|
||||
return (double) values[docId];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assumes all the values are "set", and docId is used as the index to the value array.
|
||||
*/
|
||||
public static class Single extends IntArrayAtomicFieldData {
|
||||
|
||||
/**
|
||||
* Note, here, we assume that there is no offset by 1 from docId, so position 0
|
||||
* is the value for docId 0.
|
||||
*/
|
||||
public Single(int[] values, int numDocs) {
|
||||
super(values, numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_INT);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values);
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
|
||||
|
||||
private final int[] values;
|
||||
|
||||
LongValues(int[] values) {
|
||||
super(false);
|
||||
assert values.length != 0;
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue(int docId) {
|
||||
return (long) values[docId];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
|
||||
|
||||
private final int[] values;
|
||||
|
||||
DoubleValues(int[] values) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue(int docId) {
|
||||
return values[docId];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -1,223 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import gnu.trove.iterator.TIntIterator;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.IntValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class IntArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
@Override
|
||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||
return new IntArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||
}
|
||||
}
|
||||
|
||||
public IntArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
|
||||
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericType getNumericType() {
|
||||
return NumericType.INT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean valuesOrdered() {
|
||||
// because we might have single values? we can dynamically update a flag to reflect that
|
||||
// based on the atomic field data loaded
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof ElasticSearchException) {
|
||||
throw (ElasticSearchException) e;
|
||||
} else {
|
||||
throw new ElasticSearchException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
return IntArrayAtomicFieldData.EMPTY;
|
||||
}
|
||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
final TIntArrayList values = new TIntArrayList();
|
||||
|
||||
values.add(0); // first "t" indicates null value
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
try {
|
||||
BytesRef term;
|
||||
int max = Integer.MIN_VALUE;
|
||||
int min = Integer.MAX_VALUE;
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
while ((term = iter.next()) != null) {
|
||||
int value = NumericUtils.prefixCodedToInt(term);
|
||||
values.add(value);
|
||||
if (value > max) {
|
||||
max = value;
|
||||
}
|
||||
if (value < min) {
|
||||
min = value;
|
||||
}
|
||||
}
|
||||
|
||||
Ordinals build = builder.build(fieldDataType.getSettings());
|
||||
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
|
||||
// if we can fit all our values in a byte or short we should do this!
|
||||
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
|
||||
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
|
||||
@Override
|
||||
public byte get(int index) {
|
||||
return (byte) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toArray() {
|
||||
byte[] bValues = new byte[values.size()];
|
||||
int i = 0;
|
||||
for (TIntIterator it = values.iterator(); it.hasNext(); ) {
|
||||
bValues[i++] = (byte) it.next();
|
||||
}
|
||||
return bValues;
|
||||
}
|
||||
});
|
||||
} else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
|
||||
return ShortArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ShortArrayIndexFieldData.BuilderShorts() {
|
||||
@Override
|
||||
public short get(int index) {
|
||||
return (short) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short[] toArray() {
|
||||
short[] sValues = new short[values.size()];
|
||||
int i = 0;
|
||||
for (TIntIterator it = values.iterator(); it.hasNext(); ) {
|
||||
sValues[i++] = (short) it.next();
|
||||
}
|
||||
return sValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return values.size();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return build(reader, fieldDataType, builder, build, new BuilderIntegers() {
|
||||
@Override
|
||||
public int get(int index) {
|
||||
return values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] toArray() {
|
||||
return values.toArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return values.size();
|
||||
}
|
||||
});
|
||||
} finally {
|
||||
builder.close();
|
||||
}
|
||||
}
|
||||
|
||||
static interface BuilderIntegers {
|
||||
int get(int index);
|
||||
|
||||
int[] toArray();
|
||||
|
||||
int size();
|
||||
}
|
||||
|
||||
static IntArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderIntegers values) {
|
||||
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
|
||||
Docs ordinals = build.ordinals();
|
||||
final FixedBitSet set = builder.buildDocsWithValuesSet();
|
||||
|
||||
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
|
||||
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_INT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
|
||||
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_INT;
|
||||
long ordinalsSize = build.getMemorySizeInBytes();
|
||||
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
||||
return new IntArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
|
||||
}
|
||||
|
||||
int[] sValues = new int[reader.maxDoc()];
|
||||
int maxDoc = reader.maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
sValues[i] = values.get(ordinals.getOrd(i));
|
||||
}
|
||||
if (set == null) {
|
||||
return new IntArrayAtomicFieldData.Single(sValues, reader.maxDoc());
|
||||
} else {
|
||||
return new IntArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
|
||||
}
|
||||
} else {
|
||||
return new IntArrayAtomicFieldData.WithOrdinals(
|
||||
values.toArray(),
|
||||
reader.maxDoc(),
|
||||
build);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
||||
return new IntValuesComparatorSource(this, missingValue, sortMode);
|
||||
}
|
||||
}
|
|
@ -1,217 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import gnu.trove.iterator.TLongIterator;
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class LongArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
@Override
|
||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||
return new LongArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||
}
|
||||
}
|
||||
|
||||
public LongArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
|
||||
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericType getNumericType() {
|
||||
return NumericType.LONG;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean valuesOrdered() {
|
||||
// because we might have single values? we can dynamically update a flag to reflect that
|
||||
// based on the atomic field data loaded
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof ElasticSearchException) {
|
||||
throw (ElasticSearchException) e;
|
||||
} else {
|
||||
throw new ElasticSearchException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
return LongArrayAtomicFieldData.EMPTY;
|
||||
}
|
||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
final TLongArrayList values = new TLongArrayList();
|
||||
|
||||
values.add(0); // first "t" indicates null value
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric64Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRef term;
|
||||
long max = Long.MIN_VALUE;
|
||||
long min = Long.MAX_VALUE;
|
||||
while ((term = iter.next()) != null) {
|
||||
long value = NumericUtils.prefixCodedToLong(term);
|
||||
values.add(value);
|
||||
if (value > max) {
|
||||
max = value;
|
||||
}
|
||||
if (value < min) {
|
||||
min = value;
|
||||
}
|
||||
}
|
||||
Ordinals build = builder.build(fieldDataType.getSettings());
|
||||
|
||||
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
|
||||
// since the default mapping for numeric is long, its worth optimizing the actual type used to represent the data
|
||||
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
|
||||
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
|
||||
@Override
|
||||
public byte get(int index) {
|
||||
return (byte) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toArray() {
|
||||
byte[] bValues = new byte[values.size()];
|
||||
int i = 0;
|
||||
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
|
||||
bValues[i++] = (byte) it.next();
|
||||
}
|
||||
return bValues;
|
||||
}
|
||||
});
|
||||
} else if (min >= Short.MIN_VALUE && max <= Short.MAX_VALUE) {
|
||||
return ShortArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ShortArrayIndexFieldData.BuilderShorts() {
|
||||
@Override
|
||||
public short get(int index) {
|
||||
return (short) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short[] toArray() {
|
||||
short[] sValues = new short[values.size()];
|
||||
int i = 0;
|
||||
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
|
||||
sValues[i++] = (short) it.next();
|
||||
}
|
||||
return sValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return values.size();
|
||||
}
|
||||
});
|
||||
} else if (min >= Integer.MIN_VALUE && max <= Integer.MAX_VALUE) {
|
||||
return IntArrayIndexFieldData.build(reader, fieldDataType, builder, build, new IntArrayIndexFieldData.BuilderIntegers() {
|
||||
@Override
|
||||
public int get(int index) {
|
||||
return (int) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] toArray() {
|
||||
int[] iValues = new int[values.size()];
|
||||
int i = 0;
|
||||
for (TLongIterator it = values.iterator(); it.hasNext(); ) {
|
||||
iValues[i++] = (int) it.next();
|
||||
}
|
||||
return iValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return values.size();
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
|
||||
Docs ordinals = build.ordinals();
|
||||
final FixedBitSet set = builder.buildDocsWithValuesSet();
|
||||
|
||||
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
|
||||
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_LONG + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
|
||||
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_LONG;
|
||||
long ordinalsSize = build.getMemorySizeInBytes();
|
||||
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
||||
return new LongArrayAtomicFieldData.WithOrdinals(values.toArray(new long[values.size()]), reader.maxDoc(), build);
|
||||
}
|
||||
|
||||
long[] sValues = new long[reader.maxDoc()];
|
||||
int maxDoc = reader.maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
sValues[i] = values.get(ordinals.getOrd(i));
|
||||
}
|
||||
if (set == null) {
|
||||
return new LongArrayAtomicFieldData.Single(sValues, reader.maxDoc());
|
||||
} else {
|
||||
return new LongArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
|
||||
}
|
||||
} else {
|
||||
return new LongArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
|
||||
}
|
||||
} finally {
|
||||
builder.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
||||
return new LongValuesComparatorSource(this, missingValue, sortMode);
|
||||
}
|
||||
}
|
|
@ -19,25 +19,25 @@
|
|||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
|
||||
/**
|
||||
* {@link AtomicNumericFieldData} implementation which stores data in packed arrays to save memory.
|
||||
*/
|
||||
public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||
public abstract class PackedArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||
|
||||
public static final LongArrayAtomicFieldData EMPTY = new Empty();
|
||||
public static final PackedArrayAtomicFieldData EMPTY = new Empty();
|
||||
|
||||
protected final long[] values;
|
||||
private final int numDocs;
|
||||
|
||||
protected long size = -1;
|
||||
|
||||
public LongArrayAtomicFieldData(long[] values, int numDocs) {
|
||||
public PackedArrayAtomicFieldData(int numDocs) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.numDocs = numDocs;
|
||||
}
|
||||
|
||||
|
@ -50,10 +50,10 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
|||
return numDocs;
|
||||
}
|
||||
|
||||
static class Empty extends LongArrayAtomicFieldData {
|
||||
static class Empty extends PackedArrayAtomicFieldData {
|
||||
|
||||
Empty() {
|
||||
super(null, 0);
|
||||
super(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -92,12 +92,14 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
|||
}
|
||||
}
|
||||
|
||||
public static class WithOrdinals extends LongArrayAtomicFieldData {
|
||||
public static class WithOrdinals extends PackedArrayAtomicFieldData {
|
||||
|
||||
private final MonotonicAppendingLongBuffer values;
|
||||
private final Ordinals ordinals;
|
||||
|
||||
public WithOrdinals(long[] values, int numDocs, Ordinals ordinals) {
|
||||
super(values, numDocs);
|
||||
public WithOrdinals(MonotonicAppendingLongBuffer values, int numDocs, Ordinals ordinals) {
|
||||
super(numDocs);
|
||||
this.values = values;
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
|
@ -114,7 +116,7 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
|||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_LONG) + ordinals.getMemorySizeInBytes();
|
||||
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.ramBytesUsed() + ordinals.getMemorySizeInBytes();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
@ -131,31 +133,31 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
|||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
|
||||
|
||||
private final long[] values;
|
||||
private final MonotonicAppendingLongBuffer values;
|
||||
|
||||
LongValues(long[] values, Ordinals.Docs ordinals) {
|
||||
LongValues(MonotonicAppendingLongBuffer values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueByOrd(int ord) {
|
||||
return values[ord];
|
||||
return ord == 0 ? 0L : values.get(ord - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
|
||||
|
||||
private final long[] values;
|
||||
private final MonotonicAppendingLongBuffer values;
|
||||
|
||||
DoubleValues(long[] values, Ordinals.Docs ordinals) {
|
||||
DoubleValues(MonotonicAppendingLongBuffer values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValueByOrd(int ord) {
|
||||
return values[ord];
|
||||
return ord == 0 ? 0L : values.get(ord - 1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -163,16 +165,20 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
|||
}
|
||||
|
||||
/**
|
||||
* A single valued case, where not all values are "set", so we have a FixedBitSet that
|
||||
* indicates which values have an actual value.
|
||||
* A single valued case, where not all values are "set", so we have a special
|
||||
* value which encodes the fact that the document has no value.
|
||||
*/
|
||||
public static class SingleFixedSet extends LongArrayAtomicFieldData {
|
||||
public static class SingleSparse extends PackedArrayAtomicFieldData {
|
||||
|
||||
private final FixedBitSet set;
|
||||
private final PackedInts.Mutable values;
|
||||
private final long minValue;
|
||||
private final long missingValue;
|
||||
|
||||
public SingleFixedSet(long[] values, int numDocs, FixedBitSet set) {
|
||||
super(values, numDocs);
|
||||
this.set = set;
|
||||
public SingleSparse(PackedInts.Mutable values, long minValue, int numDocs, long missingValue) {
|
||||
super(numDocs);
|
||||
this.values = values;
|
||||
this.minValue = minValue;
|
||||
this.missingValue = missingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -188,63 +194,69 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
|||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
|
||||
size = values.ramBytesUsed() + 2 * RamUsage.NUM_BYTES_LONG;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values, set);
|
||||
return new LongValues(values, minValue, missingValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values, set);
|
||||
return new DoubleValues(values, minValue, missingValue);
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
|
||||
|
||||
private final long[] values;
|
||||
private final FixedBitSet set;
|
||||
private final PackedInts.Mutable values;
|
||||
private final long minValue;
|
||||
private final long missingValue;
|
||||
|
||||
LongValues(long[] values, FixedBitSet set) {
|
||||
LongValues(PackedInts.Mutable values, long minValue, long missingValue) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.set = set;
|
||||
this.minValue = minValue;
|
||||
this.missingValue = missingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return set.get(docId);
|
||||
return values.get(docId) != missingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue(int docId) {
|
||||
return values[docId];
|
||||
final long value = values.get(docId);
|
||||
return value == missingValue ? 0L : minValue + value;
|
||||
}
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
|
||||
|
||||
private final long[] values;
|
||||
private final FixedBitSet set;
|
||||
private final PackedInts.Mutable values;
|
||||
private final long minValue;
|
||||
private final long missingValue;
|
||||
|
||||
DoubleValues(long[] values, FixedBitSet set) {
|
||||
DoubleValues(PackedInts.Mutable values, long minValue, long missingValue) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.set = set;
|
||||
this.minValue = minValue;
|
||||
this.missingValue = missingValue;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return set.get(docId);
|
||||
return values.get(docId) != missingValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue(int docId) {
|
||||
return (double) values[docId];
|
||||
final long value = values.get(docId);
|
||||
return value == missingValue ? 0L : minValue + value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -252,14 +264,19 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
|||
/**
|
||||
* Assumes all the values are "set", and docId is used as the index to the value array.
|
||||
*/
|
||||
public static class Single extends LongArrayAtomicFieldData {
|
||||
public static class Single extends PackedArrayAtomicFieldData {
|
||||
|
||||
private final PackedInts.Mutable values;
|
||||
private final long minValue;
|
||||
|
||||
/**
|
||||
* Note, here, we assume that there is no offset by 1 from docId, so position 0
|
||||
* is the value for docId 0.
|
||||
*/
|
||||
public Single(long[] values, int numDocs) {
|
||||
super(values, numDocs);
|
||||
public Single(PackedInts.Mutable values, long minValue, int numDocs) {
|
||||
super(numDocs);
|
||||
this.values = values;
|
||||
this.minValue = minValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -275,49 +292,53 @@ public abstract class LongArrayAtomicFieldData extends AtomicNumericFieldData {
|
|||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_LONG);
|
||||
size = values.ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values);
|
||||
return new LongValues(values, minValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values);
|
||||
return new DoubleValues(values, minValue);
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
|
||||
|
||||
private final long[] values;
|
||||
private final PackedInts.Mutable values;
|
||||
private final long minValue;
|
||||
|
||||
LongValues(long[] values) {
|
||||
LongValues(PackedInts.Mutable values, long minValue) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.minValue = minValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue(int docId) {
|
||||
return values[docId];
|
||||
return minValue + values.get(docId);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
|
||||
|
||||
private final long[] values;
|
||||
private final PackedInts.Mutable values;
|
||||
private final long minValue;
|
||||
|
||||
DoubleValues(long[] values) {
|
||||
DoubleValues(PackedInts.Mutable values, long minValue) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.minValue = minValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue(int docId) {
|
||||
return (double) values[docId];
|
||||
return minValue + values.get(docId);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
/**
|
||||
* Stores numeric data into bit-packed arrays for better memory efficiency.
|
||||
*/
|
||||
public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
private NumericType numericType;
|
||||
|
||||
public Builder setNumericType(NumericType numericType) {
|
||||
this.numericType = numericType;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexFieldData<AtomicNumericFieldData> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||
return new PackedArrayIndexFieldData(index, indexSettings, fieldNames, type, cache, numericType);
|
||||
}
|
||||
}
|
||||
|
||||
private final NumericType numericType;
|
||||
|
||||
public PackedArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache, NumericType numericType) {
|
||||
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
||||
Preconditions.checkNotNull(numericType);
|
||||
Preconditions.checkArgument(EnumSet.of(NumericType.BYTE, NumericType.SHORT, NumericType.INT, NumericType.LONG).contains(numericType), getClass().getSimpleName() + " only supports integer types, not " + numericType);
|
||||
this.numericType = numericType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericType getNumericType() {
|
||||
return numericType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean valuesOrdered() {
|
||||
// because we might have single values? we can dynamically update a flag to reflect that
|
||||
// based on the atomic field data loaded
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof ElasticSearchException) {
|
||||
throw (ElasticSearchException) e;
|
||||
} else {
|
||||
throw new ElasticSearchException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
return PackedArrayAtomicFieldData.EMPTY;
|
||||
}
|
||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
// Lucene encodes numeric data so that the lexicographical (encoded) order matches the integer order so we know the sequence of
|
||||
// longs is going to be monotonically increasing
|
||||
final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();
|
||||
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
||||
try {
|
||||
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)), reader.getLiveDocs());
|
||||
BytesRef term;
|
||||
assert !getNumericType().isFloatingPoint();
|
||||
final boolean indexedAsLong = getNumericType().requiredBits() > 32;
|
||||
while ((term = iter.next()) != null) {
|
||||
final long value = indexedAsLong
|
||||
? NumericUtils.prefixCodedToLong(term)
|
||||
: NumericUtils.prefixCodedToInt(term);
|
||||
assert values.size() == 0 || value > values.get(values.size() - 1);
|
||||
values.add(value);
|
||||
}
|
||||
Ordinals build = builder.build(fieldDataType.getSettings());
|
||||
|
||||
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
|
||||
Docs ordinals = build.ordinals();
|
||||
final FixedBitSet set = builder.buildDocsWithValuesSet();
|
||||
|
||||
long minValue, maxValue;
|
||||
minValue = maxValue = 0;
|
||||
if (values.size() > 0) {
|
||||
minValue = values.get(0);
|
||||
maxValue = values.get(values.size() - 1);
|
||||
}
|
||||
|
||||
// Encode document without a value with a special value
|
||||
long missingValue = 0;
|
||||
if (set != null) {
|
||||
if ((maxValue - minValue + 1) == values.size()) {
|
||||
// values are dense
|
||||
if (minValue > Long.MIN_VALUE) {
|
||||
missingValue = --minValue;
|
||||
} else {
|
||||
assert maxValue != Long.MAX_VALUE;
|
||||
missingValue = ++maxValue;
|
||||
}
|
||||
} else {
|
||||
for (long i = 1; i < values.size(); ++i) {
|
||||
if (values.get(i) > values.get(i - 1) + 1) {
|
||||
missingValue = values.get(i - 1) + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
missingValue -= minValue; // delta
|
||||
}
|
||||
|
||||
final long delta = maxValue - minValue;
|
||||
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
|
||||
final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
||||
|
||||
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
|
||||
final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
|
||||
final long uniqueValuesSize = values.ramBytesUsed();
|
||||
final long ordinalsSize = build.getMemorySizeInBytes();
|
||||
|
||||
if (uniqueValuesSize + ordinalsSize < singleValuesSize) {
|
||||
return new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
|
||||
}
|
||||
|
||||
final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
||||
if (missingValue != 0) {
|
||||
sValues.fill(0, sValues.size(), missingValue);
|
||||
}
|
||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
final int ord = ordinals.getOrd(i);
|
||||
if (ord > 0) {
|
||||
sValues.set(i, values.get(ord - 1) - minValue);
|
||||
}
|
||||
}
|
||||
if (set == null) {
|
||||
return new PackedArrayAtomicFieldData.Single(sValues, minValue, reader.maxDoc());
|
||||
} else {
|
||||
return new PackedArrayAtomicFieldData.SingleSparse(sValues, minValue, reader.maxDoc(), missingValue);
|
||||
}
|
||||
} else {
|
||||
return new PackedArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
|
||||
}
|
||||
} finally {
|
||||
builder.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
||||
return new LongValuesComparatorSource(this, missingValue, sortMode);
|
||||
}
|
||||
}
|
|
@ -60,8 +60,6 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
|
|||
}
|
||||
|
||||
final PagedBytes bytes = new PagedBytes(15);
|
||||
int startBytesBPV;
|
||||
int startNumUniqueTerms;
|
||||
|
||||
int maxDoc = reader.maxDoc();
|
||||
final int termCountHardLimit;
|
||||
|
@ -82,19 +80,13 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
|
|||
// effort to load what we can (see LUCENE-2142)
|
||||
numUniqueTerms = termCountHardLimit;
|
||||
}
|
||||
|
||||
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms * 4);
|
||||
|
||||
startNumUniqueTerms = (int) numUniqueTerms;
|
||||
} else {
|
||||
startBytesBPV = 1;
|
||||
startNumUniqueTerms = 1;
|
||||
}
|
||||
|
||||
final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
|
||||
termOrdToBytesOffset.add(0); // first ord is reserved for missing values
|
||||
boolean preDefineBitsRequired = regex == null && frequency == null;
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc());
|
||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio);
|
||||
try {
|
||||
// 0 is reserved for "unset"
|
||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||
|
|
|
@ -1,324 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
|
||||
/**
|
||||
*/
|
||||
public abstract class ShortArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||
|
||||
public static final ShortArrayAtomicFieldData EMPTY = new Empty();
|
||||
|
||||
protected final short[] values;
|
||||
private final int numDocs;
|
||||
|
||||
protected long size = -1;
|
||||
|
||||
public ShortArrayAtomicFieldData(short[] values, int numDocs) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.numDocs = numDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumDocs() {
|
||||
return numDocs;
|
||||
}
|
||||
|
||||
static class Empty extends ShortArrayAtomicFieldData {
|
||||
|
||||
Empty() {
|
||||
super(null, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return LongValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return DoubleValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesValues getBytesValues() {
|
||||
return BytesValues.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues getScriptValues() {
|
||||
return ScriptDocValues.EMPTY;
|
||||
}
|
||||
}
|
||||
|
||||
public static class WithOrdinals extends ShortArrayAtomicFieldData {
|
||||
|
||||
private final Ordinals ordinals;
|
||||
|
||||
public WithOrdinals(short[] values, int numDocs, Ordinals ordinals) {
|
||||
super(values, numDocs);
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return ordinals.isMultiValued();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_SHORT) + ordinals.getMemorySizeInBytes();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values, ordinals.ordinals());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values, ordinals.ordinals());
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
|
||||
|
||||
private final short[] values;
|
||||
|
||||
LongValues(short[] values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueByOrd(int ord) {
|
||||
return (long) values[ord];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
|
||||
|
||||
private final short[] values;
|
||||
|
||||
DoubleValues(short[] values, Ordinals.Docs ordinals) {
|
||||
super(ordinals);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValueByOrd(int ord) {
|
||||
return values[ord];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A single valued case, where not all values are "set", so we have a FixedBitSet that
|
||||
* indicates which values have an actual value.
|
||||
*/
|
||||
public static class SingleFixedSet extends ShortArrayAtomicFieldData {
|
||||
|
||||
private final FixedBitSet set;
|
||||
|
||||
public SingleFixedSet(short[] values, int numDocs, FixedBitSet set) {
|
||||
super(values, numDocs);
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_SHORT) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values, set);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values, set);
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
|
||||
|
||||
private final short[] values;
|
||||
private final FixedBitSet set;
|
||||
|
||||
LongValues(short[] values, FixedBitSet set) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return set.get(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue(int docId) {
|
||||
return (long) values[docId];
|
||||
}
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
|
||||
|
||||
private final short[] values;
|
||||
private final FixedBitSet set;
|
||||
|
||||
DoubleValues(short[] values, FixedBitSet set) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
this.set = set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasValue(int docId) {
|
||||
return set.get(docId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue(int docId) {
|
||||
return (double) values[docId];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assumes all the values are "set", and docId is used as the index to the value array.
|
||||
*/
|
||||
public static class Single extends ShortArrayAtomicFieldData {
|
||||
|
||||
/**
|
||||
* Note, here, we assume that there is no offset by 1 from docId, so position 0
|
||||
* is the value for docId 0.
|
||||
*/
|
||||
public Single(short[] values, int numDocs) {
|
||||
super(values, numDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isMultiValued() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValuesOrdered() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getMemorySizeInBytes() {
|
||||
if (size == -1) {
|
||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_SHORT);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongValues getLongValues() {
|
||||
return new LongValues(values);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleValues getDoubleValues() {
|
||||
return new DoubleValues(values);
|
||||
}
|
||||
|
||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
|
||||
|
||||
private final short[] values;
|
||||
|
||||
LongValues(short[] values) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValue(int docId) {
|
||||
return (long) values[docId];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
|
||||
|
||||
private final short[] values;
|
||||
|
||||
DoubleValues(short[] values) {
|
||||
super(false);
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getValue(int docId) {
|
||||
return (double) values[docId];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,199 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import gnu.trove.iterator.TShortIterator;
|
||||
import gnu.trove.list.array.TShortArrayList;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.RamUsage;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.ShortValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class ShortArrayIndexFieldData extends AbstractIndexFieldData<AtomicNumericFieldData> implements IndexNumericFieldData<AtomicNumericFieldData> {
|
||||
|
||||
public static class Builder implements IndexFieldData.Builder {
|
||||
|
||||
@Override
|
||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||
return new ShortArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||
}
|
||||
}
|
||||
|
||||
public ShortArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) {
|
||||
super(index, indexSettings, fieldNames, fieldDataType, cache);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericType getNumericType() {
|
||||
return NumericType.SHORT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean valuesOrdered() {
|
||||
// because we might have single values? we can dynamically update a flag to reflect that
|
||||
// based on the atomic field data loaded
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData load(AtomicReaderContext context) {
|
||||
try {
|
||||
return cache.load(context, this);
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof ElasticSearchException) {
|
||||
throw (ElasticSearchException) e;
|
||||
} else {
|
||||
throw new ElasticSearchException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exception {
|
||||
AtomicReader reader = context.reader();
|
||||
Terms terms = reader.terms(getFieldNames().indexName());
|
||||
if (terms == null) {
|
||||
return ShortArrayAtomicFieldData.EMPTY;
|
||||
}
|
||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||
final TShortArrayList values = new TShortArrayList();
|
||||
|
||||
values.add((short) 0); // first "t" indicates null value
|
||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc());
|
||||
try {
|
||||
BytesRef term;
|
||||
short max = Short.MIN_VALUE;
|
||||
short min = Short.MAX_VALUE;
|
||||
BytesRefIterator iter = builder.buildFromTerms(builder.wrapNumeric32Bit(terms.iterator(null)), reader.getLiveDocs());
|
||||
while ((term = iter.next()) != null) {
|
||||
short value = (short) NumericUtils.prefixCodedToInt(term);
|
||||
values.add(value);
|
||||
if (value > max) {
|
||||
max = value;
|
||||
}
|
||||
if (value < min) {
|
||||
min = value;
|
||||
}
|
||||
}
|
||||
|
||||
Ordinals build = builder.build(fieldDataType.getSettings());
|
||||
if (fieldDataType.getSettings().getAsBoolean("optimize_type", true)) {
|
||||
// if we can fit all our values in a byte we should do this!
|
||||
if (min >= Byte.MIN_VALUE && max <= Byte.MAX_VALUE) {
|
||||
return ByteArrayIndexFieldData.build(reader, fieldDataType, builder, build, new ByteArrayIndexFieldData.BuilderBytes() {
|
||||
@Override
|
||||
public byte get(int index) {
|
||||
return (byte) values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] toArray() {
|
||||
byte[] bValues = new byte[values.size()];
|
||||
int i = 0;
|
||||
for (TShortIterator it = values.iterator(); it.hasNext(); ) {
|
||||
bValues[i++] = (byte) it.next();
|
||||
}
|
||||
return bValues;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return build(reader, fieldDataType, builder, build, new BuilderShorts() {
|
||||
@Override
|
||||
public short get(int index) {
|
||||
return values.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short[] toArray() {
|
||||
return values.toArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return values.size();
|
||||
}
|
||||
});
|
||||
} finally {
|
||||
builder.close();
|
||||
}
|
||||
}
|
||||
|
||||
static interface BuilderShorts {
|
||||
short get(int index);
|
||||
|
||||
short[] toArray();
|
||||
|
||||
int size();
|
||||
}
|
||||
|
||||
static ShortArrayAtomicFieldData build(AtomicReader reader, FieldDataType fieldDataType, OrdinalsBuilder builder, Ordinals build, BuilderShorts values) {
|
||||
if (!build.isMultiValued() && CommonSettings.removeOrdsOnSingleValue(fieldDataType)) {
|
||||
Docs ordinals = build.ordinals();
|
||||
final FixedBitSet set = builder.buildDocsWithValuesSet();
|
||||
|
||||
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
|
||||
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_SHORT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
|
||||
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_SHORT;
|
||||
long ordinalsSize = build.getMemorySizeInBytes();
|
||||
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
||||
return new ShortArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
|
||||
}
|
||||
|
||||
short[] sValues = new short[reader.maxDoc()];
|
||||
int maxDoc = reader.maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
sValues[i] = values.get(ordinals.getOrd(i));
|
||||
}
|
||||
|
||||
if (set == null) {
|
||||
return new ShortArrayAtomicFieldData.Single(sValues, reader.maxDoc());
|
||||
} else {
|
||||
return new ShortArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
|
||||
}
|
||||
} else {
|
||||
return new ShortArrayAtomicFieldData.WithOrdinals(values.toArray(), reader.maxDoc(), build);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
|
||||
return new ShortValuesComparatorSource(this, missingValue, sortMode);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.benchmark.fielddata;
|
||||
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldDataService;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
public class LongFieldDataBenchmark {
|
||||
|
||||
private static final Random RANDOM = new Random();
|
||||
private static final int SECONDS_PER_YEAR = 60 * 60 * 24 * 365;
|
||||
|
||||
public static enum Data {
|
||||
SINGLE_VALUES_DENSE_ENUM {
|
||||
public int numValues() {
|
||||
return 1;
|
||||
}
|
||||
@Override
|
||||
public long nextValue() {
|
||||
return RANDOM.nextInt(16);
|
||||
}
|
||||
},
|
||||
SINGLE_VALUED_DENSE_DATE {
|
||||
public int numValues() {
|
||||
return 1;
|
||||
}
|
||||
@Override
|
||||
public long nextValue() {
|
||||
// somewhere in-between 2010 and 2012
|
||||
return 1000L * (40L * SECONDS_PER_YEAR + RANDOM.nextInt(2 * SECONDS_PER_YEAR));
|
||||
}
|
||||
},
|
||||
MULTI_VALUED_DATE {
|
||||
public int numValues() {
|
||||
return RANDOM.nextInt(3);
|
||||
}
|
||||
@Override
|
||||
public long nextValue() {
|
||||
// somewhere in-between 2010 and 2012
|
||||
return 1000L * (40L * SECONDS_PER_YEAR + RANDOM.nextInt(2 * SECONDS_PER_YEAR));
|
||||
}
|
||||
},
|
||||
MULTI_VALUED_ENUM {
|
||||
public int numValues() {
|
||||
return RANDOM.nextInt(3);
|
||||
}
|
||||
@Override
|
||||
public long nextValue() {
|
||||
return 3 + RANDOM.nextInt(8);
|
||||
}
|
||||
},
|
||||
SINGLE_VALUED_SPARSE_RANDOM {
|
||||
public int numValues() {
|
||||
return RANDOM.nextFloat() < 0.1f ? 1 : 0;
|
||||
}
|
||||
@Override
|
||||
public long nextValue() {
|
||||
return RANDOM.nextLong();
|
||||
}
|
||||
},
|
||||
MULTI_VALUED_SPARSE_RANDOM {
|
||||
public int numValues() {
|
||||
return RANDOM.nextFloat() < 0.1f ? 1 + RANDOM.nextInt(5) : 0;
|
||||
}
|
||||
@Override
|
||||
public long nextValue() {
|
||||
return RANDOM.nextLong();
|
||||
}
|
||||
},
|
||||
MULTI_VALUED_DENSE_RANDOM {
|
||||
public int numValues() {
|
||||
return 1 + RANDOM.nextInt(3);
|
||||
}
|
||||
@Override
|
||||
public long nextValue() {
|
||||
return RANDOM.nextLong();
|
||||
}
|
||||
};
|
||||
public abstract int numValues();
|
||||
public abstract long nextValue();
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final IndexWriterConfig iwc = new IndexWriterConfig(Lucene.VERSION, new KeywordAnalyzer());
|
||||
final String fieldName = "f";
|
||||
final int numDocs = 1000000;
|
||||
System.out.println("Data\tLoading time\tImplementation\tActual size\tExpected size");
|
||||
for (Data data : Data.values()) {
|
||||
final RAMDirectory dir = new RAMDirectory();
|
||||
final IndexWriter indexWriter = new IndexWriter(dir, iwc);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final Document doc = new Document();
|
||||
final int numFields = data.numValues();
|
||||
for (int j = 0; j < numFields; ++j) {
|
||||
doc.add(new LongField(fieldName, data.nextValue(), Store.NO));
|
||||
}
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
indexWriter.forceMerge(1);
|
||||
indexWriter.close();
|
||||
|
||||
final DirectoryReader dr = DirectoryReader.open(dir);
|
||||
final IndexFieldDataService fds = new IndexFieldDataService(new Index("dummy"));
|
||||
final IndexNumericFieldData<AtomicNumericFieldData> fd = fds.getForField(new FieldMapper.Names(fieldName), new FieldDataType("long"));
|
||||
final long start = System.nanoTime();
|
||||
final AtomicNumericFieldData afd = fd.loadDirect(SlowCompositeReaderWrapper.wrap(dr).getContext());
|
||||
final long loadingTimeMs = (System.nanoTime() - start) / 1000 / 1000;
|
||||
System.out.println(data + "\t" + loadingTimeMs + "\t" + afd.getClass().getSimpleName() + "\t" + RamUsageEstimator.humanSizeOf(afd.getLongValues()) + "\t" + RamUsageEstimator.humanReadableUnits(afd.getMemorySizeInBytes()));
|
||||
dr.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -34,7 +34,6 @@ import org.elasticsearch.search.facet.FacetBuilder;
|
|||
|
||||
import java.util.Date;
|
||||
|
||||
import static org.elasticsearch.client.Requests.createIndexRequest;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||
|
@ -63,7 +62,7 @@ public class HistogramFacetSearchBenchmark {
|
|||
|
||||
Client client = node1.client();
|
||||
|
||||
long COUNT = SizeValue.parseSizeValue("5m").singles();
|
||||
long COUNT = SizeValue.parseSizeValue("20m").singles();
|
||||
int BATCH = 500;
|
||||
int QUERY_WARMUP = 20;
|
||||
int QUERY_COUNT = 200;
|
||||
|
@ -76,9 +75,32 @@ public class HistogramFacetSearchBenchmark {
|
|||
|
||||
Thread.sleep(10000);
|
||||
try {
|
||||
client.admin().indices().create(createIndexRequest("test").settings(
|
||||
settingsBuilder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0)))
|
||||
.actionGet();
|
||||
client.admin().indices().prepareCreate("test")
|
||||
.setSettings(settingsBuilder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0))
|
||||
.addMapping("test", jsonBuilder()
|
||||
.startObject()
|
||||
.startObject("test")
|
||||
.startObject("properties")
|
||||
.startObject("name")
|
||||
.startObject("fields")
|
||||
.startObject("l_value")
|
||||
.field("type", "long")
|
||||
.endObject()
|
||||
.startObject("i_value")
|
||||
.field("type", "integer")
|
||||
.endObject()
|
||||
.startObject("s_value")
|
||||
.field("type", "short")
|
||||
.endObject()
|
||||
.startObject("b_value")
|
||||
.field("type", "byte")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject())
|
||||
.execute().actionGet();
|
||||
|
||||
StopWatch stopWatch = new StopWatch().start();
|
||||
|
||||
|
@ -90,9 +112,13 @@ public class HistogramFacetSearchBenchmark {
|
|||
BulkRequestBuilder request = client.prepareBulk();
|
||||
for (int j = 0; j < BATCH; j++) {
|
||||
counter++;
|
||||
final long value = lValues[counter % lValues.length];
|
||||
XContentBuilder source = jsonBuilder().startObject()
|
||||
.field("id", Integer.valueOf(counter))
|
||||
.field("l_value", lValues[counter % lValues.length])
|
||||
.field("l_value", value)
|
||||
.field("i_value", (int) value)
|
||||
.field("s_value", (short) value)
|
||||
.field("b_value", (byte) value)
|
||||
.field("date", new Date())
|
||||
.endObject();
|
||||
request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(counter))
|
||||
|
@ -125,6 +151,9 @@ public class HistogramFacetSearchBenchmark {
|
|||
SearchResponse searchResponse = client.prepareSearch()
|
||||
.setQuery(matchAllQuery())
|
||||
.addFacet(histogramFacet("l_value").field("l_value").interval(4))
|
||||
.addFacet(histogramFacet("i_value").field("i_value").interval(4))
|
||||
.addFacet(histogramFacet("s_value").field("s_value").interval(4))
|
||||
.addFacet(histogramFacet("b_value").field("b_value").interval(4))
|
||||
.addFacet(histogramFacet("date").field("date").interval(1000))
|
||||
.execute().actionGet();
|
||||
if (j == 0) {
|
||||
|
@ -137,30 +166,33 @@ public class HistogramFacetSearchBenchmark {
|
|||
System.out.println("--> Warmup DONE");
|
||||
|
||||
long totalQueryTime = 0;
|
||||
for (int j = 0; j < QUERY_COUNT; j++) {
|
||||
SearchResponse searchResponse = client.prepareSearch()
|
||||
.setQuery(matchAllQuery())
|
||||
.addFacet(histogramFacet("l_value").field("l_value").interval(4))
|
||||
.execute().actionGet();
|
||||
if (searchResponse.getHits().totalHits() != COUNT) {
|
||||
System.err.println("--> mismatch on hits");
|
||||
for (String field : new String[] {"b_value", "s_value", "i_value", "l_value"}) {
|
||||
totalQueryTime = 0;
|
||||
for (int j = 0; j < QUERY_COUNT; j++) {
|
||||
SearchResponse searchResponse = client.prepareSearch()
|
||||
.setQuery(matchAllQuery())
|
||||
.addFacet(histogramFacet(field).field(field).interval(4))
|
||||
.execute().actionGet();
|
||||
if (searchResponse.getHits().totalHits() != COUNT) {
|
||||
System.err.println("--> mismatch on hits");
|
||||
}
|
||||
totalQueryTime += searchResponse.getTookInMillis();
|
||||
}
|
||||
totalQueryTime += searchResponse.getTookInMillis();
|
||||
}
|
||||
System.out.println("--> Histogram Facet (l_value) " + (totalQueryTime / QUERY_COUNT) + "ms");
|
||||
System.out.println("--> Histogram Facet (" + field + ") " + (totalQueryTime / QUERY_COUNT) + "ms");
|
||||
|
||||
totalQueryTime = 0;
|
||||
for (int j = 0; j < QUERY_COUNT; j++) {
|
||||
SearchResponse searchResponse = client.prepareSearch()
|
||||
.setQuery(matchAllQuery())
|
||||
.addFacet(histogramFacet("l_value").field("l_value").valueField("l_value").interval(4))
|
||||
.execute().actionGet();
|
||||
if (searchResponse.getHits().totalHits() != COUNT) {
|
||||
System.err.println("--> mismatch on hits");
|
||||
totalQueryTime = 0;
|
||||
for (int j = 0; j < QUERY_COUNT; j++) {
|
||||
SearchResponse searchResponse = client.prepareSearch()
|
||||
.setQuery(matchAllQuery())
|
||||
.addFacet(histogramFacet("l_value").field("l_value").valueField("l_value").interval(4))
|
||||
.execute().actionGet();
|
||||
if (searchResponse.getHits().totalHits() != COUNT) {
|
||||
System.err.println("--> mismatch on hits");
|
||||
}
|
||||
totalQueryTime += searchResponse.getTookInMillis();
|
||||
}
|
||||
totalQueryTime += searchResponse.getTookInMillis();
|
||||
System.out.println("--> Histogram Facet (" + field + "/" + field + ") " + (totalQueryTime / QUERY_COUNT) + "ms");
|
||||
}
|
||||
System.out.println("--> Histogram Facet (l_value/l_value) " + (totalQueryTime / QUERY_COUNT) + "ms");
|
||||
|
||||
totalQueryTime = 0;
|
||||
for (int j = 0; j < QUERY_COUNT; j++) {
|
||||
|
|
|
@ -20,12 +20,7 @@
|
|||
package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.index.Index;
|
||||
|
@ -55,7 +50,8 @@ public abstract class AbstractFieldDataTests {
|
|||
@BeforeMethod
|
||||
public void setup() throws Exception {
|
||||
ifdService = new IndexFieldDataService(new Index("test"));
|
||||
writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)));
|
||||
// LogByteSizeMP to preserve doc ID order
|
||||
writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, new StandardAnalyzer(Lucene.VERSION)).setMergePolicy(new LogByteSizeMergePolicy()));
|
||||
}
|
||||
|
||||
protected AtomicReaderContext refreshReader() throws Exception {
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class ByteFieldDataTests extends IntFieldDataTests {
|
||||
|
||||
@Override
|
||||
protected FieldDataType getFieldDataType() {
|
||||
return new FieldDataType("byte");
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,227 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import org.apache.lucene.document.*;
|
||||
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class IntFieldDataTests extends NumericFieldDataTests {
|
||||
|
||||
@Override
|
||||
protected FieldDataType getFieldDataType() {
|
||||
return new FieldDataType("int");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeByte() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeShort() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeInteger() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fillSingleValueAllSet() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new IntField("value", 2, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new IntField("value", 1, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "3", Field.Store.NO));
|
||||
d.add(new IntField("value", 3, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fillSingleValueWithMissing() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new IntField("value", 2, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
//d.add(new StringField("value", one(), Field.Store.NO)); // MISSING....
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "3", Field.Store.NO));
|
||||
d.add(new IntField("value", 3, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fillMultiValueAllSet() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new IntField("value", 2, Field.Store.NO));
|
||||
d.add(new IntField("value", 4, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new IntField("value", 1, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "3", Field.Store.NO));
|
||||
d.add(new IntField("value", 3, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void fillMultiValueWithMissing() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new IntField("value", 2, Field.Store.NO));
|
||||
d.add(new IntField("value", 4, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
//d.add(new StringField("value", one(), Field.Store.NO)); // MISSING
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "3", Field.Store.NO));
|
||||
d.add(new IntField("value", 3, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
protected void fillExtendedMvSet() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new IntField("value", 2, Field.Store.NO));
|
||||
d.add(new IntField("value", 4, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "3", Field.Store.NO));
|
||||
d.add(new IntField("value", 3, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
writer.commit();
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "4", Field.Store.NO));
|
||||
d.add(new IntField("value", 4, Field.Store.NO));
|
||||
d.add(new IntField("value", 5, Field.Store.NO));
|
||||
d.add(new IntField("value", 6, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "5", Field.Store.NO));
|
||||
d.add(new IntField("value", 6, Field.Store.NO));
|
||||
d.add(new IntField("value", 7, Field.Store.NO));
|
||||
d.add(new IntField("value", 8, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "6", Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "7", Field.Store.NO));
|
||||
d.add(new IntField("value", 8, Field.Store.NO));
|
||||
d.add(new IntField("value", 9, Field.Store.NO));
|
||||
d.add(new IntField("value", 10, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
writer.commit();
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "8", Field.Store.NO));
|
||||
d.add(new IntField("value", -8, Field.Store.NO));
|
||||
d.add(new IntField("value", -9, Field.Store.NO));
|
||||
d.add(new IntField("value", -10, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
}
|
|
@ -19,92 +19,39 @@
|
|||
|
||||
package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import gnu.trove.iterator.TLongIterator;
|
||||
import gnu.trove.set.TDoubleSet;
|
||||
import gnu.trove.set.TLongSet;
|
||||
import gnu.trove.set.hash.TDoubleHashSet;
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.IntArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.LongArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.*;
|
||||
import org.elasticsearch.index.fielddata.plain.PackedArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
/**
|
||||
* Tests for all integer types (byte, short, int, long).
|
||||
*/
|
||||
public class LongFieldDataTests extends NumericFieldDataTests {
|
||||
|
||||
@Override
|
||||
protected FieldDataType getFieldDataType() {
|
||||
// we don't want to optimize the type so it will always be a long...
|
||||
return new FieldDataType("long", ImmutableSettings.builder().put("optimize_type", false));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeByte() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeShort() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeInteger() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Integer.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Integer.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(IntArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE));
|
||||
return new FieldDataType("long", ImmutableSettings.builder());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -121,7 +68,7 @@ public class LongFieldDataTests extends NumericFieldDataTests {
|
|||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(LongArrayAtomicFieldData.class));
|
||||
assertThat(fieldData, instanceOf(PackedArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Integer.MAX_VALUE + 1l));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Integer.MIN_VALUE - 1l));
|
||||
}
|
||||
|
@ -264,4 +211,171 @@ public class LongFieldDataTests extends NumericFieldDataTests {
|
|||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
private static final int SECONDS_PER_YEAR = 60 * 60 * 24 * 365;
|
||||
|
||||
// TODO: use random() when migrating to Junit
|
||||
public static enum Data {
|
||||
SINGLE_VALUED_DENSE_ENUM {
|
||||
public int numValues(Random r) {
|
||||
return 1;
|
||||
}
|
||||
@Override
|
||||
public long nextValue(Random r) {
|
||||
return 1 + r.nextInt(16);
|
||||
}
|
||||
},
|
||||
SINGLE_VALUED_DENSE_DATE {
|
||||
public int numValues(Random r) {
|
||||
return 1;
|
||||
}
|
||||
@Override
|
||||
public long nextValue(Random r) {
|
||||
// somewhere in-between 2010 and 2012
|
||||
return 1000L * (40L * SECONDS_PER_YEAR + r.nextInt(2 * SECONDS_PER_YEAR));
|
||||
}
|
||||
},
|
||||
MULTI_VALUED_DATE {
|
||||
public int numValues(Random r) {
|
||||
return r.nextInt(3);
|
||||
}
|
||||
@Override
|
||||
public long nextValue(Random r) {
|
||||
// somewhere in-between 2010 and 2012
|
||||
return 1000L * (40L * SECONDS_PER_YEAR + r.nextInt(2 * SECONDS_PER_YEAR));
|
||||
}
|
||||
},
|
||||
MULTI_VALUED_ENUM {
|
||||
public int numValues(Random r) {
|
||||
return r.nextInt(3);
|
||||
}
|
||||
@Override
|
||||
public long nextValue(Random r) {
|
||||
return 3 + r.nextInt(8);
|
||||
}
|
||||
},
|
||||
SINGLE_VALUED_SPARSE_RANDOM {
|
||||
public int numValues(Random r) {
|
||||
return r.nextFloat() < 0.1f ? 1 : 0;
|
||||
}
|
||||
@Override
|
||||
public long nextValue(Random r) {
|
||||
return r.nextLong();
|
||||
}
|
||||
},
|
||||
MULTI_VALUED_SPARSE_RANDOM {
|
||||
public int numValues(Random r) {
|
||||
return r.nextFloat() < 0.1f ? 1 + r.nextInt(5) : 0;
|
||||
}
|
||||
@Override
|
||||
public long nextValue(Random r) {
|
||||
return r.nextLong();
|
||||
}
|
||||
},
|
||||
MULTI_VALUED_DENSE_RANDOM {
|
||||
public int numValues(Random r) {
|
||||
return 1 + r.nextInt(3);
|
||||
}
|
||||
@Override
|
||||
public long nextValue(Random r) {
|
||||
return r.nextLong();
|
||||
}
|
||||
};
|
||||
public abstract int numValues(Random r);
|
||||
public abstract long nextValue(Random r);
|
||||
}
|
||||
|
||||
private void test(List<TLongSet> values) throws Exception {
|
||||
StringField id = new StringField("_id", "", Field.Store.NO);
|
||||
|
||||
for (int i = 0; i < values.size(); ++i) {
|
||||
Document doc = new Document();
|
||||
id.setStringValue("" + i);
|
||||
doc.add(id);
|
||||
final TLongSet v = values.get(i);
|
||||
for (TLongIterator it = v.iterator(); it.hasNext(); ) {
|
||||
LongField value = new LongField("value", it.next(), Field.Store.NO);
|
||||
doc.add(value);
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.forceMerge(1);
|
||||
|
||||
final IndexNumericFieldData indexFieldData = getForField("value");
|
||||
final AtomicNumericFieldData atomicFieldData = indexFieldData.load(refreshReader());
|
||||
final LongValues data = atomicFieldData.getLongValues();
|
||||
final DoubleValues doubleData = atomicFieldData.getDoubleValues();
|
||||
final TLongSet set = new TLongHashSet();
|
||||
final TDoubleSet doubleSet = new TDoubleHashSet();
|
||||
for (int i = 0; i < values.size(); ++i) {
|
||||
final TLongSet v = values.get(i);
|
||||
|
||||
assertThat(data.hasValue(i), equalTo(!v.isEmpty()));
|
||||
assertThat(doubleData.hasValue(i), equalTo(!v.isEmpty()));
|
||||
|
||||
if (v.isEmpty()) {
|
||||
assertThat(data.getValue(i), equalTo(0L));
|
||||
assertThat(doubleData.getValue(i), equalTo(0d));
|
||||
}
|
||||
|
||||
set.clear();
|
||||
for (LongValues.Iter iter = data.getIter(i); iter.hasNext(); ) {
|
||||
set.add(iter.next());
|
||||
}
|
||||
assertThat(set, equalTo(v));
|
||||
|
||||
final TDoubleSet doubleV = new TDoubleHashSet();
|
||||
for (TLongIterator it = v.iterator(); it.hasNext(); ) {
|
||||
doubleV.add((double) it.next());
|
||||
}
|
||||
doubleSet.clear();
|
||||
for (DoubleValues.Iter iter = doubleData.getIter(i); iter.hasNext(); ) {
|
||||
doubleSet.add(iter.next());
|
||||
}
|
||||
assertThat(doubleSet, equalTo(doubleV));
|
||||
}
|
||||
}
|
||||
|
||||
private void test(Data data) throws Exception {
|
||||
Random r = new Random(data.ordinal());
|
||||
final int numDocs = 1000 + r.nextInt(19000);
|
||||
final List<TLongSet> values = new ArrayList<TLongSet>(numDocs);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final int numValues = data.numValues(r);
|
||||
final TLongSet vals = new TLongHashSet(numValues);
|
||||
for (int j = 0; j < numValues; ++j) {
|
||||
vals.add(data.nextValue(r));
|
||||
}
|
||||
values.add(vals);
|
||||
}
|
||||
test(values);
|
||||
}
|
||||
|
||||
public void testSingleValuedDenseEnum() throws Exception {
|
||||
test(Data.SINGLE_VALUED_DENSE_ENUM);
|
||||
}
|
||||
|
||||
public void testSingleValuedDenseDate() throws Exception {
|
||||
test(Data.SINGLE_VALUED_DENSE_DATE);
|
||||
}
|
||||
|
||||
public void testSingleValuedSparseRandom() throws Exception {
|
||||
test(Data.SINGLE_VALUED_SPARSE_RANDOM);
|
||||
}
|
||||
|
||||
public void testMultiValuedDate() throws Exception {
|
||||
test(Data.MULTI_VALUED_DATE);
|
||||
}
|
||||
|
||||
public void testMultiValuedEnum() throws Exception {
|
||||
test(Data.MULTI_VALUED_ENUM);
|
||||
}
|
||||
|
||||
public void testMultiValuedSparseRandom() throws Exception {
|
||||
test(Data.MULTI_VALUED_SPARSE_RANDOM);
|
||||
}
|
||||
|
||||
public void testMultiValuedDenseRandom() throws Exception {
|
||||
test(Data.MULTI_VALUED_DENSE_RANDOM);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.unit.index.fielddata;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.elasticsearch.index.fielddata.AtomicNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ByteArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.ShortArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class ShortFieldDataTests extends IntFieldDataTests {
|
||||
|
||||
@Override
|
||||
protected FieldDataType getFieldDataType() {
|
||||
return new FieldDataType("short");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeByte() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Byte.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ByteArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Byte.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Byte.MIN_VALUE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOptimizeTypeShort() throws Exception {
|
||||
Document d = new Document();
|
||||
d.add(new StringField("_id", "1", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MAX_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
d = new Document();
|
||||
d.add(new StringField("_id", "2", Field.Store.NO));
|
||||
d.add(new LongField("value", Short.MIN_VALUE, Field.Store.NO));
|
||||
writer.addDocument(d);
|
||||
|
||||
IndexNumericFieldData indexFieldData = ifdService.getForField(new FieldMapper.Names("value"), new FieldDataType("long"));
|
||||
AtomicNumericFieldData fieldData = indexFieldData.load(refreshReader());
|
||||
assertThat(fieldData, instanceOf(ShortArrayAtomicFieldData.class));
|
||||
assertThat(fieldData.getLongValues().getValue(0), equalTo((long) Short.MAX_VALUE));
|
||||
assertThat(fieldData.getLongValues().getValue(1), equalTo((long) Short.MIN_VALUE));
|
||||
}
|
||||
|
||||
}
|
|
@ -24,9 +24,9 @@ import org.apache.lucene.document.IntField;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.ByteValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.plain.ByteArrayIndexFieldData;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -39,7 +39,7 @@ public class ByteNestedSortingTests extends AbstractNumberNestedSortingTests {
|
|||
|
||||
@Override
|
||||
protected IndexFieldData.XFieldComparatorSource createInnerFieldComparator(String fieldName, SortMode sortMode, Object missingValue) {
|
||||
ByteArrayIndexFieldData fieldData = getForField(fieldName);
|
||||
IndexNumericFieldData fieldData = getForField(fieldName);
|
||||
return new ByteValuesComparatorSource(fieldData, missingValue, sortMode);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,9 +24,9 @@ import org.apache.lucene.document.IntField;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.IntValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.plain.IntArrayIndexFieldData;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -39,7 +39,7 @@ public class IntegerNestedSortingTests extends AbstractNumberNestedSortingTests
|
|||
|
||||
@Override
|
||||
protected IndexFieldData.XFieldComparatorSource createInnerFieldComparator(String fieldName, SortMode sortMode, Object missingValue) {
|
||||
IntArrayIndexFieldData fieldData = getForField(fieldName);
|
||||
IndexNumericFieldData fieldData = getForField(fieldName);
|
||||
return new IntValuesComparatorSource(fieldData, missingValue, sortMode);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.elasticsearch.index.fielddata.FieldDataType;
|
|||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.plain.LongArrayIndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.PackedArrayIndexFieldData;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -39,7 +39,7 @@ public class LongNestedSortingTests extends AbstractNumberNestedSortingTests {
|
|||
|
||||
@Override
|
||||
protected IndexFieldData.XFieldComparatorSource createInnerFieldComparator(String fieldName, SortMode sortMode, Object missingValue) {
|
||||
LongArrayIndexFieldData fieldData = getForField(fieldName);
|
||||
PackedArrayIndexFieldData fieldData = getForField(fieldName);
|
||||
return new LongValuesComparatorSource(fieldData, missingValue, sortMode);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,9 +24,9 @@ import org.apache.lucene.document.IntField;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.ShortValuesComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||
import org.elasticsearch.index.fielddata.plain.ShortArrayIndexFieldData;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -39,7 +39,7 @@ public class ShortNestedSortingTests extends AbstractNumberNestedSortingTests {
|
|||
|
||||
@Override
|
||||
protected IndexFieldData.XFieldComparatorSource createInnerFieldComparator(String fieldName, SortMode sortMode, Object missingValue) {
|
||||
ShortArrayIndexFieldData fieldData = getForField(fieldName);
|
||||
IndexNumericFieldData fieldData = getForField(fieldName);
|
||||
return new ShortValuesComparatorSource(fieldData, missingValue, sortMode);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue