From fc09ce7ac90066e6034496541ee32f0d55be0b9e Mon Sep 17 00:00:00 2001 From: uboness Date: Sun, 20 Jan 2013 16:06:07 +0100 Subject: [PATCH] Implemented int field data --- .../index/fielddata/ByteValues.java | 98 ++ .../index/fielddata/DoubleValues.java | 102 ++ .../fielddata/IndexFieldDataService.java | 7 +- .../index/fielddata/LongValues.java | 3 + .../index/fielddata/ShortValues.java | 98 ++ .../index/fielddata/StringValues.java | 95 ++ .../plain/IntArrayAtomicFieldData.java | 927 ++++++++++++++++++ .../plain/IntArrayIndexFieldData.java | 157 +++ .../index/fielddata/IntFieldDataTests.java | 107 ++ 9 files changed, 1590 insertions(+), 4 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayAtomicFieldData.java create mode 100644 src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayIndexFieldData.java create mode 100644 src/test/java/org/elasticsearch/test/unit/index/fielddata/IntFieldDataTests.java diff --git a/src/main/java/org/elasticsearch/index/fielddata/ByteValues.java b/src/main/java/org/elasticsearch/index/fielddata/ByteValues.java index 86d4e885936..cebabaeb3bc 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ByteValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ByteValues.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.index.fielddata.util.ByteArrayRef; +import org.elasticsearch.index.fielddata.util.IntArrayRef; import org.elasticsearch.index.fielddata.util.LongArrayRef; /** @@ -99,6 +100,103 @@ public interface ByteValues { } } + public static class IntBased implements ByteValues { + + private final IntValues values; + + private final ByteArrayRef arrayScratch = new ByteArrayRef(new byte[1], 1); + private final ValueIter iter = new ValueIter(); + private final Proc proc = new Proc(); + + public IntBased(IntValues values) { + this.values = values; + } + + @Override + public boolean isMultiValued() { + return values.isMultiValued(); + } + + @Override + public boolean hasValue(int docId) { + return values.hasValue(docId); + } + + @Override + public byte getValue(int docId) { + return (byte) values.getValue(docId); + } + + @Override + public byte getValueMissing(int docId, byte missingValue) { + return (byte) values.getValueMissing(docId, missingValue); + } + + @Override + public ByteArrayRef getValues(int docId) { + IntArrayRef arrayRef = values.getValues(docId); + int size = arrayRef.size(); + if (size == 0) { + return ByteArrayRef.EMPTY; + } + arrayScratch.reset(size); + for (int i = arrayRef.start; i < arrayRef.end; i++) { + arrayScratch.values[arrayScratch.end++] = (byte) arrayRef.values[i]; + } + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return iter.reset(values.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + values.forEachValueInDoc(docId, this.proc.reset(proc)); + } + + static class ValueIter implements Iter { + + private IntValues.Iter iter; + + public ValueIter reset(IntValues.Iter iter) { + this.iter = iter; + return this; + } + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public byte next() { + return (byte) iter.next(); + } + } + + static class Proc implements IntValues.ValueInDocProc { + + private ValueInDocProc proc; + + public Proc reset(ValueInDocProc proc) { + this.proc = proc; + return this; + } + + @Override + public void onValue(int docId, int value) { + proc.onValue(docId, (byte) value); + } + + @Override + public void onMissing(int docId) { + proc.onMissing(docId); + } + } + } + public static class LongBased implements ByteValues { private final LongValues values; diff --git a/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java b/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java index 3e3d595cb61..c7374d4f0a3 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java @@ -21,6 +21,8 @@ package org.elasticsearch.index.fielddata; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.index.fielddata.util.DoubleArrayRef; +import org.elasticsearch.index.fielddata.util.IntArrayRef; +import org.elasticsearch.index.fielddata.util.LongArrayRef; /** */ @@ -97,4 +99,104 @@ public interface DoubleValues { } } } + + public static class LongBased implements DoubleValues { + + private final LongValues values; + private final DoubleArrayRef arrayScratch = new DoubleArrayRef(new double[1], 1); + private final ValueIter iter = new ValueIter(); + private final Proc proc = new Proc(); + + public LongBased(LongValues values) { + this.values = values; + } + + @Override + public boolean isMultiValued() { + return values.isMultiValued(); + } + + @Override + public boolean hasValue(int docId) { + return values.hasValue(docId); + } + + @Override + public double getValue(int docId) { + return (double) values.getValue(docId); + } + + @Override + public double getValueMissing(int docId, double missingValue) { + if (!values.hasValue(docId)) { + return missingValue; + } + return getValue(docId); + } + + @Override + public DoubleArrayRef getValues(int docId) { + LongArrayRef arrayRef = values.getValues(docId); + int size = arrayRef.size(); + if (size == 0) { + return DoubleArrayRef.EMPTY; + } + arrayScratch.reset(size); + for (int i = arrayRef.start; i < arrayRef.end; i++) { + arrayScratch.values[arrayScratch.end++] = (double) arrayRef.values[i]; + } + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return this.iter.reset(values.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + values.forEachValueInDoc(docId, this.proc.reset(proc)); + } + + static class ValueIter implements Iter { + + private LongValues.Iter iter; + + private ValueIter reset(LongValues.Iter iter) { + this.iter = iter; + return this; + } + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public double next() { + return (double) iter.next(); + } + } + + static class Proc implements LongValues.ValueInDocProc { + + private ValueInDocProc proc; + + private Proc reset(ValueInDocProc proc) { + this.proc = proc; + return this; + } + + @Override + public void onValue(int docId, long value) { + this.proc.onValue(docId, (double) value); + } + + @Override + public void onMissing(int docId) { + this.proc.onMissing(docId); + } + } + + } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java index ed6b277b588..bf1775735f6 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java +++ b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldDataService.java @@ -29,10 +29,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.Index; -import org.elasticsearch.index.fielddata.plain.ConcreteBytesRefIndexFieldData; -import org.elasticsearch.index.fielddata.plain.DoubleArrayIndexFieldData; -import org.elasticsearch.index.fielddata.plain.GeoPointDoubleArrayIndexFieldData; -import org.elasticsearch.index.fielddata.plain.LongArrayIndexFieldData; +import org.elasticsearch.index.fielddata.plain.*; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.settings.IndexSettings; @@ -51,6 +48,7 @@ public class IndexFieldDataService extends AbstractIndexComponent { .put("double", new DoubleArrayIndexFieldData.Builder()) .put("long", new LongArrayIndexFieldData.Builder()) .put("geo_point", new GeoPointDoubleArrayIndexFieldData.Builder()) + .put("int", new IntArrayIndexFieldData.Builder()) .immutableMap(); buildersByTypeAndFormat = MapBuilder., IndexFieldData.Builder>newMapBuilder() @@ -58,6 +56,7 @@ public class IndexFieldDataService extends AbstractIndexComponent { .put(Tuple.tuple("double", "array"), new DoubleArrayIndexFieldData.Builder()) .put(Tuple.tuple("long", "array"), new LongArrayIndexFieldData.Builder()) .put(Tuple.tuple("geo_point", "array"), new GeoPointDoubleArrayIndexFieldData.Builder()) + .put(Tuple.tuple("int", "array"), new IntArrayIndexFieldData.Builder()) .immutableMap(); } diff --git a/src/main/java/org/elasticsearch/index/fielddata/LongValues.java b/src/main/java/org/elasticsearch/index/fielddata/LongValues.java index f6de0d87233..d275a7329f5 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/LongValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/LongValues.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.fielddata; import org.elasticsearch.ElasticSearchIllegalStateException; +import org.elasticsearch.index.fielddata.util.IntArrayRef; import org.elasticsearch.index.fielddata.util.LongArrayRef; /** @@ -47,6 +48,7 @@ public interface LongValues { void forEachValueInDoc(int docId, ValueInDocProc proc); static interface ValueInDocProc { + void onValue(int docId, long value); void onMissing(int docId); @@ -97,4 +99,5 @@ public interface LongValues { } } } + } diff --git a/src/main/java/org/elasticsearch/index/fielddata/ShortValues.java b/src/main/java/org/elasticsearch/index/fielddata/ShortValues.java index 43de95e1369..0346ff78377 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ShortValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ShortValues.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.fielddata; import org.elasticsearch.ElasticSearchIllegalStateException; +import org.elasticsearch.index.fielddata.util.IntArrayRef; import org.elasticsearch.index.fielddata.util.LongArrayRef; import org.elasticsearch.index.fielddata.util.ShortArrayRef; @@ -99,6 +100,103 @@ public interface ShortValues { } } + public static class IntBased implements ShortValues { + + private final IntValues values; + + private final ShortArrayRef arrayScratch = new ShortArrayRef(new short[1], 1); + private final ValueIter iter = new ValueIter(); + private final Proc proc = new Proc(); + + public IntBased(IntValues values) { + this.values = values; + } + + @Override + public boolean isMultiValued() { + return values.isMultiValued(); + } + + @Override + public boolean hasValue(int docId) { + return values.hasValue(docId); + } + + @Override + public short getValue(int docId) { + return (short) values.getValue(docId); + } + + @Override + public short getValueMissing(int docId, short missingValue) { + return (short) values.getValueMissing(docId, missingValue); + } + + @Override + public ShortArrayRef getValues(int docId) { + IntArrayRef arrayRef = values.getValues(docId); + int size = arrayRef.size(); + if (size == 0) { + return ShortArrayRef.EMPTY; + } + arrayScratch.reset(size); + for (int i = arrayRef.start; i < arrayRef.end; i++) { + arrayScratch.values[arrayScratch.end++] = (short) arrayRef.values[i]; + } + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return iter.reset(values.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + values.forEachValueInDoc(docId, this.proc.reset(proc)); + } + + static class ValueIter implements Iter { + + private IntValues.Iter iter; + + public ValueIter reset(IntValues.Iter iter) { + this.iter = iter; + return this; + } + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public short next() { + return (short) iter.next(); + } + } + + static class Proc implements IntValues.ValueInDocProc { + + private ValueInDocProc proc; + + public Proc reset(ValueInDocProc proc) { + this.proc = proc; + return this; + } + + @Override + public void onValue(int docId, int value) { + proc.onValue(docId, (short) value); + } + + @Override + public void onMissing(int docId) { + proc.onMissing(docId); + } + } + } + public static class LongBased implements ShortValues { private final LongValues values; diff --git a/src/main/java/org/elasticsearch/index/fielddata/StringValues.java b/src/main/java/org/elasticsearch/index/fielddata/StringValues.java index d017568f328..56f1a5e08a4 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/StringValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/StringValues.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.fielddata; import org.elasticsearch.ElasticSearchIllegalStateException; +import org.elasticsearch.index.fielddata.util.LongArrayRef; import org.elasticsearch.index.fielddata.util.StringArrayRef; /** @@ -99,4 +100,98 @@ public interface StringValues { } } } + + public static class LongBased implements StringValues { + + private final LongValues values; + + private final StringArrayRef arrayScratch = new StringArrayRef(new String[1], 1); + private final ValuesIter valuesIter = new ValuesIter(); + private final Proc proc = new Proc(); + + public LongBased(LongValues values) { + this.values = values; + } + + @Override + public boolean isMultiValued() { + return values.isMultiValued(); + } + + @Override + public boolean hasValue(int docId) { + return values.hasValue(docId); + } + + @Override + public String getValue(int docId) { + if (!values.hasValue(docId)) { + return null; + } + return Long.toString(values.getValue(docId)); + } + + @Override + public StringArrayRef getValues(int docId) { + LongArrayRef arrayRef = values.getValues(docId); + int size = arrayRef.size(); + if (size == 0) return StringArrayRef.EMPTY; + + arrayScratch.reset(size); + for (int i = arrayRef.start; i < arrayRef.end; i++) { + arrayScratch.values[arrayScratch.end++] = Long.toString(arrayRef.values[i]); + } + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return valuesIter.reset(values.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + values.forEachValueInDoc(docId, this.proc.reset(proc)); + } + + static class ValuesIter implements Iter { + + private LongValues.Iter iter; + + private ValuesIter reset(LongValues.Iter iter) { + this.iter = iter; + return this; + } + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public String next() { + return Long.toString(iter.next()); + } + } + + static class Proc implements LongValues.ValueInDocProc { + + private ValueInDocProc proc; + + private Proc reset(ValueInDocProc proc) { + this.proc = proc; + return this; + } + + @Override + public void onValue(int docId, long value) { + proc.onValue(docId, Long.toString(value)); + } + + @Override + public void onMissing(int docId) { + proc.onMissing(docId); + } + } + } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayAtomicFieldData.java new file mode 100644 index 00000000000..774faefb941 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayAtomicFieldData.java @@ -0,0 +1,927 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.plain; + +import org.apache.lucene.util.FixedBitSet; +import org.elasticsearch.common.RamUsage; +import org.elasticsearch.index.fielddata.*; +import org.elasticsearch.index.fielddata.ordinals.Ordinals; +import org.elasticsearch.index.fielddata.util.DoubleArrayRef; +import org.elasticsearch.index.fielddata.util.IntArrayRef; +import org.elasticsearch.index.fielddata.util.LongArrayRef; + +/** + */ +public abstract class IntArrayAtomicFieldData implements AtomicNumericFieldData { + + protected final int[] values; + private final int numDocs; + + protected long size = -1; + + public IntArrayAtomicFieldData(int[] values, int numDocs) { + this.values = values; + this.numDocs = numDocs; + } + + @Override + public int getNumDocs() { + return numDocs; + } + + public static class WithOrdinals extends IntArrayAtomicFieldData { + + private final Ordinals ordinals; + + public WithOrdinals(int[] values, int numDocs, Ordinals ordinals) { + super(values, numDocs); + this.ordinals = ordinals; + } + + @Override + public boolean isMultiValued() { + return ordinals.isMultiValued(); + } + + @Override + public boolean isValuesOrdered() { + return true; + } + + @Override + public long getMemorySizeInBytes() { + if (size == -1) { + size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_INT) + ordinals.getMemorySizeInBytes(); + } + return size; + } + + @Override + public BytesValues getBytesValues() { + return new BytesValues.StringBased(getStringValues()); + } + + @Override + public HashedBytesValues getHashedBytesValues() { + return new HashedBytesValues.StringBased(getStringValues()); + } + + @Override + public StringValues getStringValues() { + return new StringValues.LongBased(getLongValues()); + } + + @Override + public ScriptDocValues getScriptValues() { + return new ScriptDocValues.NumericInteger(getIntValues()); + } + + @Override + public ByteValues getByteValues() { + return new ByteValues.LongBased(getLongValues()); + } + + @Override + public ShortValues getShortValues() { + return new ShortValues.LongBased(getLongValues()); + } + + @Override + public IntValues getIntValues() { + return new IntValues(values, ordinals.ordinals()); + } + + @Override + public LongValues getLongValues() { + return new LongValues(values, ordinals.ordinals()); + } + + @Override + public FloatValues getFloatValues() { + return new FloatValues.DoubleBased(getDoubleValues()); + } + + @Override + public DoubleValues getDoubleValues() { + return new DoubleValues(values, ordinals.ordinals()); + } + + static class IntValues implements org.elasticsearch.index.fielddata.IntValues { + + private final int[] values; + private final Ordinals.Docs ordinals; + + private final IntArrayRef arrayScratch = new IntArrayRef(new int[1], 1); + private final ValuesIter iter; + + IntValues(int[] values, Ordinals.Docs ordinals) { + this.values = values; + this.ordinals = ordinals; + this.iter = new ValuesIter(values); + } + + @Override + public boolean isMultiValued() { + return ordinals.isMultiValued(); + } + + @Override + public boolean hasValue(int docId) { + return ordinals.getOrd(docId) != 0; + } + + @Override + public int getValue(int docId) { + return values[ordinals.getOrd(docId)]; + } + + @Override + public int getValueMissing(int docId, int missingValue) { + int ord = ordinals.getOrd(docId); + if (ord == 0) { + return missingValue; + } else { + return values[ord]; + } + } + + @Override + public IntArrayRef getValues(int docId) { + IntArrayRef ords = ordinals.getOrds(docId); + int size = ords.size(); + if (size == 0) return IntArrayRef.EMPTY; + + arrayScratch.reset(size); + for (int i = ords.start; i < ords.end; i++) { + arrayScratch.values[arrayScratch.end++] = values[ords.values[i]]; + } + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return iter.reset(ordinals.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + Ordinals.Docs.Iter iter = ordinals.getIter(docId); + int ord = iter.next(); + if (ord == 0) { + proc.onMissing(docId); + return; + } + do { + proc.onValue(docId, values[ord]); + } while ((ord = iter.next()) != 0); + } + + static class ValuesIter implements Iter { + + private final int[] values; + private Ordinals.Docs.Iter ordsIter; + private int ord; + + ValuesIter(int[] values) { + this.values = values; + } + + public ValuesIter reset(Ordinals.Docs.Iter ordsIter) { + this.ordsIter = ordsIter; + this.ord = ordsIter.next(); + return this; + } + + @Override + public boolean hasNext() { + return ord != 0; + } + + @Override + public int next() { + int value = values[ord]; + ord = ordsIter.next(); + return value; + } + } + } + + static class LongValues implements org.elasticsearch.index.fielddata.LongValues { + + private final int[] values; + private final Ordinals.Docs ordinals; + + private final LongArrayRef arrayScratch = new LongArrayRef(new long[1], 1); + private final ValuesIter iter; + + LongValues(int[] values, Ordinals.Docs ordinals) { + this.values = values; + this.ordinals = ordinals; + this.iter = new ValuesIter(values); + } + + @Override + public boolean isMultiValued() { + return ordinals.isMultiValued(); + } + + @Override + public boolean hasValue(int docId) { + return ordinals.getOrd(docId) != 0; + } + + @Override + public long getValue(int docId) { + return (long) values[ordinals.getOrd(docId)]; + } + + @Override + public long getValueMissing(int docId, long missingValue) { + int ord = ordinals.getOrd(docId); + if (ord == 0) { + return missingValue; + } else { + return (long) values[ord]; + } + } + + @Override + public LongArrayRef getValues(int docId) { + IntArrayRef ords = ordinals.getOrds(docId); + int size = ords.size(); + if (size == 0) return LongArrayRef.EMPTY; + + arrayScratch.reset(size); + for (int i = ords.start; i < ords.end; i++) { + arrayScratch.values[arrayScratch.end++] = (long) values[ords.values[i]]; + } + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return iter.reset(ordinals.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + Ordinals.Docs.Iter iter = ordinals.getIter(docId); + int ord = iter.next(); + if (ord == 0) { + proc.onMissing(docId); + return; + } + do { + proc.onValue(docId, (long) values[ord]); + } while ((ord = iter.next()) != 0); + } + + static class ValuesIter implements Iter { + + private final int[] values; + private Ordinals.Docs.Iter ordsIter; + private int ord; + + ValuesIter(int[] values) { + this.values = values; + } + + public ValuesIter reset(Ordinals.Docs.Iter ordsIter) { + this.ordsIter = ordsIter; + this.ord = ordsIter.next(); + return this; + } + + @Override + public boolean hasNext() { + return ord != 0; + } + + @Override + public long next() { + int value = values[ord]; + ord = ordsIter.next(); + return (long) value; + } + } + } + + static class DoubleValues implements org.elasticsearch.index.fielddata.DoubleValues { + + private final int[] values; + private final Ordinals.Docs ordinals; + + private final DoubleArrayRef arrayScratch = new DoubleArrayRef(new double[1], 1); + private final ValuesIter iter; + + DoubleValues(int[] values, Ordinals.Docs ordinals) { + this.values = values; + this.ordinals = ordinals; + this.iter = new ValuesIter(values); + } + + @Override + public boolean isMultiValued() { + return ordinals.isMultiValued(); + } + + @Override + public boolean hasValue(int docId) { + return ordinals.getOrd(docId) != 0; + } + + @Override + public double getValue(int docId) { + return (double) values[ordinals.getOrd(docId)]; + } + + @Override + public double getValueMissing(int docId, double missingValue) { + int ord = ordinals.getOrd(docId); + if (ord == 0) { + return missingValue; + } else { + return (double) values[ord]; + } + } + + @Override + public DoubleArrayRef getValues(int docId) { + IntArrayRef ords = ordinals.getOrds(docId); + int size = ords.size(); + if (size == 0) return DoubleArrayRef.EMPTY; + + arrayScratch.reset(size); + for (int i = ords.start; i < ords.end; i++) { + arrayScratch.values[arrayScratch.end++] = (double) values[ords.values[i]]; + } + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return iter.reset(ordinals.getIter(docId)); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + Ordinals.Docs.Iter iter = ordinals.getIter(docId); + int ord = iter.next(); + if (ord == 0) { + proc.onMissing(docId); + return; + } + do { + proc.onValue(docId, (double) values[ord]); + } while ((ord = iter.next()) != 0); + } + + static class ValuesIter implements Iter { + + private final int[] values; + private Ordinals.Docs.Iter ordsIter; + private int ord; + + ValuesIter(int[] values) { + this.values = values; + } + + public ValuesIter reset(Ordinals.Docs.Iter ordsIter) { + this.ordsIter = ordsIter; + this.ord = ordsIter.next(); + return this; + } + + @Override + public boolean hasNext() { + return ord != 0; + } + + @Override + public double next() { + int value = values[ord]; + ord = ordsIter.next(); + return (double) value; + } + } + } + } + + /** + * A single valued case, where not all values are "set", so we have a FixedBitSet that + * indicates which values have an actual value. + */ + public static class SingleFixedSet extends IntArrayAtomicFieldData { + + private final FixedBitSet set; + + public SingleFixedSet(int[] values, int numDocs, FixedBitSet set) { + super(values, numDocs); + this.set = set; + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean isValuesOrdered() { + return false; + } + + @Override + public long getMemorySizeInBytes() { + if (size == -1) { + size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + (set.getBits().length * RamUsage.NUM_BYTES_LONG); + } + return size; + } + + @Override + public ScriptDocValues getScriptValues() { + return new ScriptDocValues.NumericDouble(getDoubleValues()); + } + + @Override + public BytesValues getBytesValues() { + return new BytesValues.StringBased(getStringValues()); + } + + @Override + public HashedBytesValues getHashedBytesValues() { + return new HashedBytesValues.StringBased(getStringValues()); + } + + @Override + public StringValues getStringValues() { + return new StringValues.LongBased(getLongValues()); + } + + @Override + public ByteValues getByteValues() { + return new ByteValues.LongBased(getLongValues()); + } + + @Override + public ShortValues getShortValues() { + return new ShortValues.LongBased(getLongValues()); + } + + @Override + public IntValues getIntValues() { + return new IntValues(values, set); + } + + @Override + public LongValues getLongValues() { + return new LongValues(values, set); + } + + @Override + public FloatValues getFloatValues() { + return new FloatValues.DoubleBased(getDoubleValues()); + } + + @Override + public DoubleValues getDoubleValues() { + return new DoubleValues(values, set); + } + + static class IntValues implements org.elasticsearch.index.fielddata.IntValues { + + private final int[] values; + private final FixedBitSet set; + + private final IntArrayRef arrayScratch = new IntArrayRef(new int[1], 1); + private final Iter.Single iter = new Iter.Single(); + + IntValues(int[] values, FixedBitSet set) { + this.values = values; + this.set = set; + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean hasValue(int docId) { + return set.get(docId); + } + + @Override + public int getValue(int docId) { + return values[docId]; + } + + @Override + public int getValueMissing(int docId, int missingValue) { + if (set.get(docId)) { + return values[docId]; + } else { + return missingValue; + } + } + + @Override + public IntArrayRef getValues(int docId) { + if (set.get(docId)) { + arrayScratch.values[0] = values[docId]; + return arrayScratch; + } else { + return IntArrayRef.EMPTY; + } + } + + @Override + public Iter getIter(int docId) { + if (set.get(docId)) { + return iter.reset(values[docId]); + } else { + return Iter.Empty.INSTANCE; + } + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + if (set.get(docId)) { + proc.onValue(docId, values[docId]); + } + } + } + + static class LongValues implements org.elasticsearch.index.fielddata.LongValues { + + private final int[] values; + private final FixedBitSet set; + + private final LongArrayRef arrayScratch = new LongArrayRef(new long[1], 1); + private final Iter.Single iter = new Iter.Single(); + + LongValues(int[] values, FixedBitSet set) { + this.values = values; + this.set = set; + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean hasValue(int docId) { + return set.get(docId); + } + + @Override + public long getValue(int docId) { + return (long) values[docId]; + } + + @Override + public long getValueMissing(int docId, long missingValue) { + if (set.get(docId)) { + return (long) values[docId]; + } else { + return missingValue; + } + } + + @Override + public LongArrayRef getValues(int docId) { + if (set.get(docId)) { + arrayScratch.values[0] = (long) values[docId]; + return arrayScratch; + } else { + return LongArrayRef.EMPTY; + } + } + + @Override + public Iter getIter(int docId) { + if (set.get(docId)) { + return iter.reset((long) values[docId]); + } else { + return Iter.Empty.INSTANCE; + } + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + if (set.get(docId)) { + proc.onValue(docId, (long) values[docId]); + } + } + } + + static class DoubleValues implements org.elasticsearch.index.fielddata.DoubleValues { + + private final int[] values; + private final FixedBitSet set; + + private final DoubleArrayRef arrayScratch = new DoubleArrayRef(new double[1], 1); + private final Iter.Single iter = new Iter.Single(); + + DoubleValues(int[] values, FixedBitSet set) { + this.values = values; + this.set = set; + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean hasValue(int docId) { + return set.get(docId); + } + + @Override + public double getValue(int docId) { + return (double) values[docId]; + } + + @Override + public double getValueMissing(int docId, double missingValue) { + if (set.get(docId)) { + return (double) values[docId]; + } else { + return missingValue; + } + } + + @Override + public DoubleArrayRef getValues(int docId) { + if (set.get(docId)) { + arrayScratch.values[0] = (double) values[docId]; + return arrayScratch; + } else { + return DoubleArrayRef.EMPTY; + } + } + + @Override + public Iter getIter(int docId) { + if (set.get(docId)) { + return iter.reset((double) values[docId]); + } else { + return Iter.Empty.INSTANCE; + } + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + if (set.get(docId)) { + proc.onValue(docId, (double) values[docId]); + } + } + } + } + + /** + * Assumes all the values are "set", and docId is used as the index to the value array. + */ + public static class Single extends IntArrayAtomicFieldData { + + /** + * Note, here, we assume that there is no offset by 1 from docId, so position 0 + * is the value for docId 0. + */ + public Single(int[] values, int numDocs) { + super(values, numDocs); + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean isValuesOrdered() { + return false; + } + + @Override + public long getMemorySizeInBytes() { + if (size == -1) { + size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE); + } + return size; + } + + @Override + public ScriptDocValues getScriptValues() { + return new ScriptDocValues.NumericDouble(getDoubleValues()); + } + + @Override + public BytesValues getBytesValues() { + return new BytesValues.StringBased(getStringValues()); + } + + @Override + public HashedBytesValues getHashedBytesValues() { + return new HashedBytesValues.StringBased(getStringValues()); + } + + @Override + public StringValues getStringValues() { + return new StringValues.LongBased(getLongValues()); + } + + @Override + public ByteValues getByteValues() { + return new ByteValues.LongBased(getLongValues()); + } + + @Override + public ShortValues getShortValues() { + return new ShortValues.LongBased(getLongValues()); + } + + @Override + public IntValues getIntValues() { + return new IntValues(values); + } + + @Override + public LongValues getLongValues() { + return new LongValues(values); + } + + @Override + public FloatValues getFloatValues() { + return new FloatValues.DoubleBased(getDoubleValues()); + } + + @Override + public DoubleValues getDoubleValues() { + return new DoubleValues(values); + } + + static class IntValues implements org.elasticsearch.index.fielddata.IntValues { + + private final int[] values; + + private final IntArrayRef arrayScratch = new IntArrayRef(new int[1], 1); + private final Iter.Single iter = new Iter.Single(); + + IntValues(int[] values) { + this.values = values; + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean hasValue(int docId) { + return true; + } + + @Override + public int getValue(int docId) { + return values[docId]; + } + + @Override + public int getValueMissing(int docId, int missingValue) { + return values[docId]; + } + + @Override + public IntArrayRef getValues(int docId) { + arrayScratch.values[0] = values[docId]; + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return iter.reset(values[docId]); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + proc.onValue(docId, values[docId]); + } + } + + static class LongValues implements org.elasticsearch.index.fielddata.LongValues { + + private final int[] values; + + private final LongArrayRef arrayScratch = new LongArrayRef(new long[1], 1); + private final Iter.Single iter = new Iter.Single(); + + LongValues(int[] values) { + this.values = values; + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean hasValue(int docId) { + return true; + } + + @Override + public long getValue(int docId) { + return (long) values[docId]; + } + + @Override + public long getValueMissing(int docId, long missingValue) { + return (long) values[docId]; + } + + @Override + public LongArrayRef getValues(int docId) { + arrayScratch.values[0] = (long) values[docId]; + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return iter.reset((long) values[docId]); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + proc.onValue(docId, (long) values[docId]); + } + } + + static class DoubleValues implements org.elasticsearch.index.fielddata.DoubleValues { + + private final int[] values; + + private final DoubleArrayRef arrayScratch = new DoubleArrayRef(new double[1], 1); + private final Iter.Single iter = new Iter.Single(); + + DoubleValues(int[] values) { + this.values = values; + } + + @Override + public boolean isMultiValued() { + return false; + } + + @Override + public boolean hasValue(int docId) { + return true; + } + + @Override + public double getValue(int docId) { + return values[docId]; + } + + @Override + public double getValueMissing(int docId, double missingValue) { + return (double) values[docId]; + } + + @Override + public DoubleArrayRef getValues(int docId) { + arrayScratch.values[0] = (double) values[docId]; + return arrayScratch; + } + + @Override + public Iter getIter(int docId) { + return iter.reset((double) values[docId]); + } + + @Override + public void forEachValueInDoc(int docId, ValueInDocProc proc) { + proc.onValue(docId, (double) values[docId]); + } + } + + } +} diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayIndexFieldData.java new file mode 100644 index 00000000000..f42baad69a0 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/IntArrayIndexFieldData.java @@ -0,0 +1,157 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.plain; + +import gnu.trove.list.array.TIntArrayList; +import org.apache.lucene.index.*; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; +import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.fielddata.*; +import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; +import org.elasticsearch.index.fielddata.ordinals.MultiFlatArrayOrdinals; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.settings.IndexSettings; + +import java.util.ArrayList; + +/** + */ +public class IntArrayIndexFieldData extends AbstractIndexFieldData implements IndexNumericFieldData { + + public static class Builder implements IndexFieldData.Builder { + + @Override + public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) { + return new IntArrayIndexFieldData(index, indexSettings, fieldNames, type, cache); + } + } + + public IntArrayIndexFieldData(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType fieldDataType, IndexFieldDataCache cache) { + super(index, indexSettings, fieldNames, fieldDataType, cache); + } + + @Override + public NumericType getNumericType() { + return NumericType.INT; + } + + @Override + public boolean valuesOrdered() { + // because we might have single values? we can dynamically update a flag to reflect that + // based on the atomic field data loaded + return false; + } + + @Override + public IntArrayAtomicFieldData load(AtomicReaderContext context) { + try { + return cache.load(context, this); + } catch (Throwable e) { + if (e instanceof ElasticSearchException) { + throw (ElasticSearchException) e; + } else { + throw new ElasticSearchException(e.getMessage(), e); + } + } + } + + @Override + public IntArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception { + AtomicReader reader = context.reader(); + + Terms terms = reader.terms(getFieldNames().indexName()); + if (terms == null) { + return new IntArrayAtomicFieldData.Single(new int[0], 0); + } + + // TODO: how can we guess the number of terms? numerics end up creating more terms per value... + final TIntArrayList values = new TIntArrayList(); + ArrayList ordinals = new ArrayList(); + int[] idx = new int[reader.maxDoc()]; + ordinals.add(new int[reader.maxDoc()]); + + values.add(0); // first "t" indicates null value + int termOrd = 1; // current term number + + TermsEnum termsEnum = terms.iterator(null); + try { + DocsEnum docsEnum = null; + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { + values.add(FieldCache.NUMERIC_UTILS_INT_PARSER.parseInt(term)); + docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); + for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { + int[] ordinal; + if (idx[docId] >= ordinals.size()) { + ordinal = new int[reader.maxDoc()]; + ordinals.add(ordinal); + } else { + ordinal = ordinals.get(idx[docId]); + } + ordinal[docId] = termOrd; + idx[docId]++; + } + termOrd++; + } + } catch (RuntimeException e) { + if (e.getClass().getName().endsWith("StopFillCacheException")) { + // all is well, in case numeric parsers are used. + } else { + throw e; + } + } + + if (ordinals.size() == 1) { + int[] nativeOrdinals = ordinals.get(0); + FixedBitSet set = new FixedBitSet(reader.maxDoc()); + int[] sValues = new int[reader.maxDoc()]; + boolean allHaveValue = true; + for (int i = 0; i < nativeOrdinals.length; i++) { + int nativeOrdinal = nativeOrdinals[i]; + if (nativeOrdinal == 0) { + allHaveValue = false; + } else { + set.set(i); + sValues[i] = values.get(nativeOrdinal); + } + } + if (allHaveValue) { + return new IntArrayAtomicFieldData.Single(sValues, reader.maxDoc()); + } else { + return new IntArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set); + } + } else { + int[][] nativeOrdinals = new int[ordinals.size()][]; + for (int i = 0; i < nativeOrdinals.length; i++) { + nativeOrdinals[i] = ordinals.get(i); + } + return new IntArrayAtomicFieldData.WithOrdinals(values.toArray(new int[values.size()]), reader.maxDoc(), new MultiFlatArrayOrdinals(nativeOrdinals, termOrd)); + } + } + + @Override + public XFieldComparatorSource comparatorSource(@Nullable Object missingValue) { + return new DoubleValuesComparatorSource(this, missingValue); + } +} diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/IntFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/IntFieldDataTests.java new file mode 100644 index 00000000000..a2b7955cd3e --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/IntFieldDataTests.java @@ -0,0 +1,107 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.test.unit.index.fielddata; + +import org.apache.lucene.document.*; +import org.elasticsearch.index.fielddata.FieldDataType; + +/** + */ +public class IntFieldDataTests extends NumericFieldDataTests { + + @Override + protected FieldDataType getFieldDataType() { + return new FieldDataType("int"); + } + + @Override + protected void fillSingleValueAllSet() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new IntField("value", 2, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + d.add(new IntField("value", 1, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "3", Field.Store.NO)); + d.add(new IntField("value", 3, Field.Store.NO)); + writer.addDocument(d); + } + + @Override + protected void fillSingleValueWithMissing() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new IntField("value", 2, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + //d.add(new StringField("value", one(), Field.Store.NO)); // MISSING.... + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "3", Field.Store.NO)); + d.add(new IntField("value", 3, Field.Store.NO)); + writer.addDocument(d); + } + + @Override + protected void fillMultiValueAllSet() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new IntField("value", 2, Field.Store.NO)); + d.add(new IntField("value", 4, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + d.add(new IntField("value", 1, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "3", Field.Store.NO)); + d.add(new IntField("value", 3, Field.Store.NO)); + writer.addDocument(d); + } + + @Override + protected void fillMultiValueWithMissing() throws Exception { + Document d = new Document(); + d.add(new StringField("_id", "1", Field.Store.NO)); + d.add(new IntField("value", 2, Field.Store.NO)); + d.add(new IntField("value", 4, Field.Store.NO)); + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "2", Field.Store.NO)); + //d.add(new StringField("value", one(), Field.Store.NO)); // MISSING + writer.addDocument(d); + + d = new Document(); + d.add(new StringField("_id", "3", Field.Store.NO)); + d.add(new IntField("value", 3, Field.Store.NO)); + writer.addDocument(d); + } +}