diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/common/Numbers.java b/modules/elasticsearch/src/main/java/org/elasticsearch/common/Numbers.java index ced1cb6f5c1..832fbede18d 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/common/Numbers.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/common/Numbers.java @@ -37,7 +37,7 @@ public final class Numbers { * @return The int converted */ public static short bytesToShort(byte[] arr) { - return (short) (((arr[2] & 0xff) << 8) | (arr[3] & 0xff)); + return (short) (((arr[0] & 0xff) << 8) | (arr[1] & 0xff)); } /** @@ -105,8 +105,8 @@ public final class Numbers { */ public static byte[] shortToBytes(int val) { byte[] arr = new byte[2]; - arr[2] = (byte) (val >>> 8); - arr[3] = (byte) (val); + arr[0] = (byte) (val >>> 8); + arr[1] = (byte) (val); return arr; } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java b/modules/elasticsearch/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java index 649bdcc0145..24877b2fc7a 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/common/xcontent/support/XContentMapValues.java @@ -80,6 +80,13 @@ public class XContentMapValues { return Short.parseShort(node.toString()); } + public static byte nodeByteValue(Object node) { + if (node instanceof Number) { + return ((Number) node).byteValue(); + } + return Byte.parseByte(node.toString()); + } + public static long nodeLongValue(Object node) { if (node instanceof Number) { return ((Number) node).longValue(); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/FieldDataType.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/FieldDataType.java index e4e4248454b..763dc9f2121 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/FieldDataType.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/FieldDataType.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.field.data; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldComparatorSource; import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.bytes.ByteFieldDataType; import org.elasticsearch.index.field.data.doubles.DoubleFieldDataType; import org.elasticsearch.index.field.data.floats.FloatFieldDataType; import org.elasticsearch.index.field.data.ints.IntFieldDataType; @@ -38,6 +39,7 @@ public interface FieldDataType { public static final class DefaultTypes { public static final StringFieldDataType STRING = new StringFieldDataType(); + public static final ByteFieldDataType BYTE = new ByteFieldDataType(); public static final ShortFieldDataType SHORT = new ShortFieldDataType(); public static final IntFieldDataType INT = new IntFieldDataType(); public static final LongFieldDataType LONG = new LongFieldDataType(); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteDocFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteDocFieldData.java new file mode 100644 index 00000000000..a6fa888167f --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteDocFieldData.java @@ -0,0 +1,40 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.field.data.bytes; + +import org.elasticsearch.index.field.data.NumericDocFieldData; + +/** + * @author kimchy (shay.banon) + */ +public class ByteDocFieldData extends NumericDocFieldData { + + public ByteDocFieldData(ByteFieldData fieldData) { + super(fieldData); + } + + public byte getValue() { + return fieldData.value(docId); + } + + public byte[] getValues() { + return fieldData.values(docId); + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java new file mode 100644 index 00000000000..d53cd929231 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java @@ -0,0 +1,142 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.field.data.bytes; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; +import org.elasticsearch.common.RamUsage; +import org.elasticsearch.common.trove.TByteArrayList; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.NumericFieldData; +import org.elasticsearch.index.field.data.support.FieldDataLoader; + +import java.io.IOException; + +/** + * @author kimchy (shay.banon) + */ +public abstract class ByteFieldData extends NumericFieldData { + + static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; + + protected final byte[] values; + + protected ByteFieldData(String fieldName, byte[] values) { + super(fieldName); + this.values = values; + } + + @Override protected long computeSizeInBytes() { + return 1 * values.length + RamUsage.NUM_BYTES_ARRAY_HEADER; + } + + abstract public byte value(int docId); + + abstract public byte[] values(int docId); + + @Override public ByteDocFieldData docFieldData(int docId) { + return super.docFieldData(docId); + } + + @Override protected ByteDocFieldData createFieldData() { + return new ByteDocFieldData(this); + } + + @Override public void forEachValue(StringValueProc proc) { + for (int i = 1; i < values.length; i++) { + proc.onValue(Byte.toString(values[i])); + } + } + + @Override public String stringValue(int docId) { + return Byte.toString(value(docId)); + } + + @Override public byte byteValue(int docId) { + return value(docId); + } + + @Override public short shortValue(int docId) { + return value(docId); + } + + @Override public int intValue(int docId) { + return (int) value(docId); + } + + @Override public long longValue(int docId) { + return (long) value(docId); + } + + @Override public float floatValue(int docId) { + return (float) value(docId); + } + + @Override public double doubleValue(int docId) { + return (double) value(docId); + } + + @Override public FieldDataType type() { + return FieldDataType.DefaultTypes.BYTE; + } + + public void forEachValue(ValueProc proc) { + for (int i = 1; i < values.length; i++) { + proc.onValue(values[i]); + } + } + + public static interface ValueProc { + void onValue(byte value); + } + + public abstract void forEachValueInDoc(int docId, ValueInDocProc proc); + + public static interface ValueInDocProc { + void onValue(int docId, byte value); + } + + public static ByteFieldData load(IndexReader reader, String field) throws IOException { + return FieldDataLoader.load(reader, field, new ByteTypeLoader()); + } + + static class ByteTypeLoader extends FieldDataLoader.FreqsTypeLoader { + + private final TByteArrayList terms = new TByteArrayList(); + + ByteTypeLoader() { + super(); + // the first one indicates null value + terms.add((byte) 0); + } + + @Override public void collectTerm(String term) { + terms.add((byte) FieldCache.NUMERIC_UTILS_INT_PARSER.parseInt(term)); + } + + @Override public ByteFieldData buildSingleValue(String field, int[] ordinals) { + return new SingleValueByteFieldData(field, ordinals, terms.toNativeArray()); + } + + @Override public ByteFieldData buildMultiValue(String field, int[][] ordinals) { + return new MultiValueByteFieldData(field, ordinals, terms.toNativeArray()); + } + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataComparator.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataComparator.java new file mode 100644 index 00000000000..51e7d60e585 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataComparator.java @@ -0,0 +1,63 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.field.data.bytes; + +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; + +/** + * @author kimchy (shay.banon) + */ +// LUCENE MONITOR: Monitor against FieldComparator.Short +public class ByteFieldDataComparator extends NumericFieldDataComparator { + + private final byte[] values; + private short bottom; + + public ByteFieldDataComparator(int numHits, String fieldName, FieldDataCache fieldDataCache) { + super(fieldName, fieldDataCache); + values = new byte[numHits]; + } + + @Override public FieldDataType fieldDataType() { + return FieldDataType.DefaultTypes.BYTE; + } + + @Override public int compare(int slot1, int slot2) { + return values[slot1] - values[slot2]; + } + + @Override public int compareBottom(int doc) { + return bottom - currentFieldData.shortValue(doc); + } + + @Override public void copy(int slot, int doc) { + values[slot] = currentFieldData.byteValue(doc); + } + + @Override public void setBottom(final int bottom) { + this.bottom = values[bottom]; + } + + @Override public Comparable value(int slot) { + return Byte.valueOf(values[slot]); + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataType.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataType.java new file mode 100644 index 00000000000..5918e024a5f --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataType.java @@ -0,0 +1,47 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.field.data.bytes; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.FieldComparatorSource; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.shorts.ShortFieldDataComparator; + +import java.io.IOException; + +/** + * @author kimchy (shay.banon) + */ +public class ByteFieldDataType implements FieldDataType { + + @Override public FieldComparatorSource newFieldComparatorSource(final FieldDataCache cache) { + return new FieldComparatorSource() { + @Override public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException { + return new ShortFieldDataComparator(numHits, fieldname, cache); + } + }; + } + + @Override public ByteFieldData load(IndexReader reader, String fieldName) throws IOException { + return ByteFieldData.load(reader, fieldName); + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java new file mode 100644 index 00000000000..4e0f42ff75e --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java @@ -0,0 +1,171 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.field.data.bytes; + +import org.elasticsearch.common.RamUsage; +import org.elasticsearch.common.thread.ThreadLocals; +import org.elasticsearch.index.field.data.doubles.DoubleFieldData; + +/** + * @author kimchy (shay.banon) + */ +public class MultiValueByteFieldData extends ByteFieldData { + + private static final int VALUE_CACHE_SIZE = 10; + + private ThreadLocal> doublesValuesCache = new ThreadLocal>() { + @Override protected ThreadLocals.CleanableValue initialValue() { + double[][] value = new double[VALUE_CACHE_SIZE][]; + for (int i = 0; i < value.length; i++) { + value[i] = new double[i]; + } + return new ThreadLocals.CleanableValue(value); + } + }; + + private ThreadLocal> valuesCache = new ThreadLocal>() { + @Override protected ThreadLocals.CleanableValue initialValue() { + byte[][] value = new byte[VALUE_CACHE_SIZE][]; + for (int i = 0; i < value.length; i++) { + value[i] = new byte[i]; + } + return new ThreadLocals.CleanableValue(value); + } + }; + + // order with value 0 indicates no value + private final int[][] ordinals; + + public MultiValueByteFieldData(String fieldName, int[][] ordinals, byte[] values) { + super(fieldName, values); + this.ordinals = ordinals; + } + + @Override protected long computeSizeInBytes() { + long size = super.computeSizeInBytes(); + size += RamUsage.NUM_BYTES_ARRAY_HEADER; // for the top level array + for (int[] ordinal : ordinals) { + size += RamUsage.NUM_BYTES_INT * ordinal.length + RamUsage.NUM_BYTES_ARRAY_HEADER; + } + return size; + } + + @Override public boolean multiValued() { + return true; + } + + @Override public boolean hasValue(int docId) { + for (int[] ordinal : ordinals) { + if (ordinal[docId] != 0) { + return true; + } + } + return false; + } + + @Override public void forEachValueInDoc(int docId, StringValueInDocProc proc) { + for (int[] ordinal : ordinals) { + int loc = ordinal[docId]; + if (loc != 0) { + proc.onValue(docId, Byte.toString(values[loc])); + } + } + } + + @Override public void forEachValueInDoc(int docId, DoubleValueInDocProc proc) { + for (int[] ordinal : ordinals) { + int loc = ordinal[docId]; + if (loc != 0) { + proc.onValue(docId, values[loc]); + } + } + } + + @Override public void forEachValueInDoc(int docId, ValueInDocProc proc) { + for (int[] ordinal : ordinals) { + int loc = ordinal[docId]; + if (loc != 0) { + proc.onValue(docId, values[loc]); + } + } + } + + @Override public double[] doubleValues(int docId) { + int length = 0; + for (int[] ordinal : ordinals) { + if (ordinal[docId] != 0) { + length++; + } + } + if (length == 0) { + return DoubleFieldData.EMPTY_DOUBLE_ARRAY; + } + double[] doubles; + if (length < VALUE_CACHE_SIZE) { + doubles = doublesValuesCache.get().get()[length]; + } else { + doubles = new double[length]; + } + int i = 0; + for (int[] ordinal : ordinals) { + int loc = ordinal[docId]; + if (loc != 0) { + doubles[i++] = values[loc]; + } + } + return doubles; + } + + @Override public byte value(int docId) { + for (int[] ordinal : ordinals) { + int loc = ordinal[docId]; + if (loc != 0) { + return values[loc]; + } + } + return 0; + } + + @Override public byte[] values(int docId) { + int length = 0; + for (int[] ordinal : ordinals) { + if (ordinal[docId] != 0) { + length++; + } + } + if (length == 0) { + return EMPTY_BYTE_ARRAY; + } + byte[] bytes; + if (length < VALUE_CACHE_SIZE) { + bytes = valuesCache.get().get()[length]; + } else { + bytes = new byte[length]; + } + int i = 0; + for (int[] ordinal : ordinals) { + int loc = ordinal[docId]; + if (loc != 0) { + bytes[i++] = values[loc]; + } + } + return bytes; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java new file mode 100644 index 00000000000..4d4c6c6a093 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java @@ -0,0 +1,111 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.field.data.bytes; + +import org.elasticsearch.common.RamUsage; +import org.elasticsearch.common.thread.ThreadLocals; +import org.elasticsearch.index.field.data.doubles.DoubleFieldData; + +/** + * @author kimchy (shay.banon) + */ +public class SingleValueByteFieldData extends ByteFieldData { + + private ThreadLocal> doublesValuesCache = new ThreadLocal>() { + @Override protected ThreadLocals.CleanableValue initialValue() { + return new ThreadLocals.CleanableValue(new double[1]); + } + }; + + private ThreadLocal> valuesCache = new ThreadLocal>() { + @Override protected ThreadLocals.CleanableValue initialValue() { + return new ThreadLocals.CleanableValue(new byte[1]); + } + }; + + // order with value 0 indicates no value + private final int[] ordinals; + + public SingleValueByteFieldData(String fieldName, int[] ordinals, byte[] values) { + super(fieldName, values); + this.ordinals = ordinals; + } + + @Override protected long computeSizeInBytes() { + return super.computeSizeInBytes() + + RamUsage.NUM_BYTES_INT * ordinals.length + RamUsage.NUM_BYTES_ARRAY_HEADER; + } + + @Override public boolean multiValued() { + return false; + } + + @Override public boolean hasValue(int docId) { + return ordinals[docId] != 0; + } + + @Override public void forEachValueInDoc(int docId, StringValueInDocProc proc) { + int loc = ordinals[docId]; + if (loc == 0) { + return; + } + proc.onValue(docId, Byte.toString(values[loc])); + } + + @Override public void forEachValueInDoc(int docId, DoubleValueInDocProc proc) { + int loc = ordinals[docId]; + if (loc == 0) { + return; + } + proc.onValue(docId, values[loc]); + } + + @Override public void forEachValueInDoc(int docId, ValueInDocProc proc) { + int loc = ordinals[docId]; + if (loc == 0) { + return; + } + proc.onValue(docId, values[loc]); + } + + @Override public byte value(int docId) { + return values[ordinals[docId]]; + } + + @Override public double[] doubleValues(int docId) { + int loc = ordinals[docId]; + if (loc == 0) { + return DoubleFieldData.EMPTY_DOUBLE_ARRAY; + } + double[] ret = doublesValuesCache.get().get(); + ret[0] = values[loc]; + return ret; + } + + @Override public byte[] values(int docId) { + int loc = ordinals[docId]; + if (loc == 0) { + return EMPTY_BYTE_ARRAY; + } + byte[] ret = valuesCache.get().get(); + ret[0] = values[loc]; + return ret; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/ByteFieldMapper.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/ByteFieldMapper.java new file mode 100644 index 00000000000..1bde6b18de8 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/ByteFieldMapper.java @@ -0,0 +1,251 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper.xcontent; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.NumericRangeFilter; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.NumericUtils; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.analysis.NumericIntegerAnalyzer; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MergeMappingException; +import org.elasticsearch.index.search.NumericRangeFieldDataFilter; + +import java.io.IOException; +import java.util.Map; + +import static org.elasticsearch.common.xcontent.support.XContentMapValues.*; +import static org.elasticsearch.index.mapper.xcontent.XContentMapperBuilders.*; +import static org.elasticsearch.index.mapper.xcontent.XContentTypeParsers.*; + +/** + * @author kimchy (shay.banon) + */ +public class ByteFieldMapper extends NumberFieldMapper { + + public static final String CONTENT_TYPE = "byte"; + + public static class Defaults extends NumberFieldMapper.Defaults { + public static final Byte NULL_VALUE = null; + } + + public static class Builder extends NumberFieldMapper.Builder { + + protected Byte nullValue = Defaults.NULL_VALUE; + + public Builder(String name) { + super(name); + builder = this; + } + + public Builder nullValue(byte nullValue) { + this.nullValue = nullValue; + return this; + } + + @Override public ByteFieldMapper build(BuilderContext context) { + ByteFieldMapper fieldMapper = new ByteFieldMapper(buildNames(context), + precisionStep, index, store, boost, omitNorms, omitTermFreqAndPositions, nullValue); + fieldMapper.includeInAll(includeInAll); + return fieldMapper; + } + } + + public static class TypeParser implements XContentMapper.TypeParser { + @Override public XContentMapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { + ByteFieldMapper.Builder builder = byteField(name); + parseNumberField(builder, name, node, parserContext); + for (Map.Entry entry : node.entrySet()) { + String propName = Strings.toUnderscoreCase(entry.getKey()); + Object propNode = entry.getValue(); + if (propName.equals("null_value")) { + builder.nullValue(nodeByteValue(propNode)); + } + } + return builder; + } + } + + private Byte nullValue; + + private String nullValueAsString; + + protected ByteFieldMapper(Names names, int precisionStep, Field.Index index, Field.Store store, + float boost, boolean omitNorms, boolean omitTermFreqAndPositions, + Byte nullValue) { + super(names, precisionStep, index, store, boost, omitNorms, omitTermFreqAndPositions, + new NamedAnalyzer("_byte/" + precisionStep, new NumericIntegerAnalyzer(precisionStep)), + new NamedAnalyzer("_byte/max", new NumericIntegerAnalyzer(Integer.MAX_VALUE))); + this.nullValue = nullValue; + this.nullValueAsString = nullValue == null ? null : nullValue.toString(); + } + + @Override protected int maxPrecisionStep() { + return 32; + } + + @Override public Byte value(Fieldable field) { + byte[] value = field.getBinaryValue(); + if (value == null) { + return null; + } + return value[0]; + } + + @Override public Byte valueFromString(String value) { + return Byte.valueOf(value); + } + + @Override public String indexedValue(String value) { + return NumericUtils.intToPrefixCoded(Byte.parseByte(value)); + } + + @Override public Query rangeQuery(String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { + return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, + lowerTerm == null ? null : Integer.parseInt(lowerTerm), + upperTerm == null ? null : Integer.parseInt(upperTerm), + includeLower, includeUpper); + } + + @Override public Filter rangeFilter(String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { + return NumericRangeFilter.newIntRange(names.indexName(), precisionStep, + lowerTerm == null ? null : Integer.parseInt(lowerTerm), + upperTerm == null ? null : Integer.parseInt(upperTerm), + includeLower, includeUpper); + } + + @Override public Filter rangeFilter(FieldDataCache fieldDataCache, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { + return NumericRangeFieldDataFilter.newByteRange(fieldDataCache, names.indexName(), + lowerTerm == null ? null : Byte.parseByte(lowerTerm), + upperTerm == null ? null : Byte.parseByte(upperTerm), + includeLower, includeUpper); + } + + @Override protected Fieldable parseCreateField(ParseContext context) throws IOException { + byte value; + if (context.externalValueSet()) { + Object externalValue = context.externalValue(); + if (externalValue == null) { + if (nullValue == null) { + return null; + } + value = nullValue; + } else { + value = ((Number) externalValue).byteValue(); + } + if (includeInAll == null || includeInAll) { + context.allEntries().addText(names.fullName(), Byte.toString(value), boost); + } + } else { + if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) { + if (nullValue == null) { + return null; + } + value = nullValue; + if (nullValueAsString != null && (includeInAll == null || includeInAll)) { + context.allEntries().addText(names.fullName(), nullValueAsString, boost); + } + } else { + value = (byte) context.parser().shortValue(); + if (includeInAll == null || includeInAll) { + context.allEntries().addText(names.fullName(), context.parser().text(), boost); + } + } + } + return new CustomByteNumericField(this, value); + } + + @Override public FieldDataType fieldDataType() { + return FieldDataType.DefaultTypes.BYTE; + } + + @Override protected String contentType() { + return CONTENT_TYPE; + } + + @Override public void merge(XContentMapper mergeWith, MergeContext mergeContext) throws MergeMappingException { + super.merge(mergeWith, mergeContext); + if (!this.getClass().equals(mergeWith.getClass())) { + return; + } + if (!mergeContext.mergeFlags().simulate()) { + this.nullValue = ((ByteFieldMapper) mergeWith).nullValue; + this.nullValueAsString = ((ByteFieldMapper) mergeWith).nullValueAsString; + } + } + + @Override protected void doXContentBody(XContentBuilder builder) throws IOException { + super.doXContentBody(builder); + if (index != Defaults.INDEX) { + builder.field("index", index.name().toLowerCase()); + } + if (store != Defaults.STORE) { + builder.field("store", store.name().toLowerCase()); + } + if (termVector != Defaults.TERM_VECTOR) { + builder.field("term_vector", termVector.name().toLowerCase()); + } + if (omitNorms != Defaults.OMIT_NORMS) { + builder.field("omit_norms", omitNorms); + } + if (omitTermFreqAndPositions != Defaults.OMIT_TERM_FREQ_AND_POSITIONS) { + builder.field("omit_term_freq_and_positions", omitTermFreqAndPositions); + } + if (precisionStep != Defaults.PRECISION_STEP) { + builder.field("precision_step", precisionStep); + } + if (nullValue != null) { + builder.field("null_value", nullValue); + } + if (includeInAll != null) { + builder.field("include_in_all", includeInAll); + } + } + + public static class CustomByteNumericField extends CustomNumericField { + + private final byte number; + + private final NumberFieldMapper mapper; + + public CustomByteNumericField(NumberFieldMapper mapper, byte number) { + super(mapper, mapper.stored() ? new byte[]{number} : null); + this.mapper = mapper; + this.number = number; + } + + @Override public TokenStream tokenStreamValue() { + if (isIndexed) { + return mapper.popCachedStream().setIntValue(number); + } + return null; + } + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/XContentDocumentMapperParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/XContentDocumentMapperParser.java index a4fc65d881c..eb6a844bafd 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/XContentDocumentMapperParser.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/XContentDocumentMapperParser.java @@ -69,6 +69,7 @@ public class XContentDocumentMapperParser extends AbstractIndexComponent impleme super(index, indexSettings); this.analysisService = analysisService; typeParsers = new MapBuilder() + .put(ByteFieldMapper.CONTENT_TYPE, new ByteFieldMapper.TypeParser()) .put(ShortFieldMapper.CONTENT_TYPE, new ShortFieldMapper.TypeParser()) .put(IntegerFieldMapper.CONTENT_TYPE, new IntegerFieldMapper.TypeParser()) .put(LongFieldMapper.CONTENT_TYPE, new LongFieldMapper.TypeParser()) diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/XContentMapperBuilders.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/XContentMapperBuilders.java index 6442e64af40..75447a2f3c0 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/XContentMapperBuilders.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/mapper/xcontent/XContentMapperBuilders.java @@ -106,6 +106,10 @@ public final class XContentMapperBuilders { return new ShortFieldMapper.Builder(name); } + public static ByteFieldMapper.Builder byteField(String name) { + return new ByteFieldMapper.Builder(name); + } + public static IntegerFieldMapper.Builder integerField(String name) { return new IntegerFieldMapper.Builder(name); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java index c1bb89e4791..98294f8b9e0 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.GetDocSet; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.bytes.ByteFieldData; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; import org.elasticsearch.index.field.data.floats.FloatFieldData; import org.elasticsearch.index.field.data.ints.IntFieldData; @@ -117,6 +118,63 @@ public abstract class NumericRangeFieldDataFilter extends Filter { return h; } + public static NumericRangeFieldDataFilter newByteRange(FieldDataCache fieldDataCache, String field, Byte lowerVal, Byte upperVal, boolean includeLower, boolean includeUpper) { + return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { + @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + final byte inclusiveLowerPoint, inclusiveUpperPoint; + if (lowerVal != null) { + byte i = lowerVal.byteValue(); + if (!includeLower && i == Byte.MAX_VALUE) + return DocSet.EMPTY_DOC_SET; + inclusiveLowerPoint = (byte) (includeLower ? i : (i + 1)); + } else { + inclusiveLowerPoint = Byte.MIN_VALUE; + } + if (upperVal != null) { + byte i = upperVal.byteValue(); + if (!includeUpper && i == Byte.MIN_VALUE) + return DocSet.EMPTY_DOC_SET; + inclusiveUpperPoint = (byte) (includeUpper ? i : (i - 1)); + } else { + inclusiveUpperPoint = Byte.MAX_VALUE; + } + + if (inclusiveLowerPoint > inclusiveUpperPoint) + return DocSet.EMPTY_DOC_SET; + + final ByteFieldData fieldData = (ByteFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.BYTE, reader, field); + return new GetDocSet(reader.maxDoc()) { + + @Override public boolean isCacheable() { + // not cacheable for several reasons: + // 1. It is only relevant when _cache is set to true, and then, we really want to create in mem bitset + // 2. Its already fast without in mem bitset, since it works with field data + return false; + } + + @Override public boolean get(int doc) throws IOException { + if (!fieldData.hasValue(doc)) { + return false; + } + if (fieldData.multiValued()) { + byte[] values = fieldData.values(doc); + for (byte value : values) { + if (value >= inclusiveLowerPoint && value <= inclusiveUpperPoint) { + return true; + } + } + return false; + } else { + byte value = fieldData.value(doc); + return value >= inclusiveLowerPoint && value <= inclusiveUpperPoint; + } + } + }; + } + }; + } + + public static NumericRangeFieldDataFilter newShortRange(FieldDataCache fieldDataCache, String field, Short lowerVal, Short upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/InternalTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/InternalTermsFacet.java index bc812b3662b..f5c6202c46c 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/InternalTermsFacet.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/InternalTermsFacet.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.facet.terms; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.InternalFacet; +import org.elasticsearch.search.facet.terms.bytes.InternalByteTermsFacet; import org.elasticsearch.search.facet.terms.doubles.InternalDoubleTermsFacet; import org.elasticsearch.search.facet.terms.floats.InternalFloatTermsFacet; import org.elasticsearch.search.facet.terms.ints.InternalIntTermsFacet; @@ -42,6 +43,7 @@ public abstract class InternalTermsFacet implements TermsFacet, InternalFacet { InternalIntTermsFacet.registerStream(); InternalFloatTermsFacet.registerStream(); InternalShortTermsFacet.registerStream(); + InternalByteTermsFacet.registerStream(); } public abstract Facet reduce(String name, List facets); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java index bae3fba9c6c..b9b9b2c52a8 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java @@ -31,6 +31,7 @@ import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.FacetCollector; import org.elasticsearch.search.facet.FacetProcessor; +import org.elasticsearch.search.facet.terms.bytes.TermsByteFacetCollector; import org.elasticsearch.search.facet.terms.doubles.TermsDoubleFacetCollector; import org.elasticsearch.search.facet.terms.floats.TermsFloatFacetCollector; import org.elasticsearch.search.facet.terms.index.IndexNameFacetCollector; @@ -145,6 +146,8 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce return new TermsFloatFacetCollector(facetName, field, size, comparatorType, context, scriptLang, script, params); } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.SHORT) { return new TermsShortFacetCollector(facetName, field, size, comparatorType, context, scriptLang, script, params); + } else if (fieldMapper.fieldDataType() == FieldDataType.DefaultTypes.BYTE) { + return new TermsByteFacetCollector(facetName, field, size, comparatorType, context, scriptLang, script, params); } } return new TermsStringFacetCollector(facetName, field, size, comparatorType, context, excluded, pattern, scriptLang, script, params); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java new file mode 100644 index 00000000000..b6b3fa8bdbd --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java @@ -0,0 +1,265 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.bytes; + +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableList; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.thread.ThreadLocals; +import org.elasticsearch.common.trove.TByteIntHashMap; +import org.elasticsearch.common.trove.TByteIntIterator; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.InternalTermsFacet; +import org.elasticsearch.search.facet.terms.TermsFacet; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +/** + * @author kimchy (shay.banon) + */ +public class InternalByteTermsFacet extends InternalTermsFacet { + + private static final String STREAM_TYPE = "bTerms"; + + public static void registerStream() { + Streams.registerStream(STREAM, STREAM_TYPE); + } + + static Stream STREAM = new Stream() { + @Override public Facet readFacet(String type, StreamInput in) throws IOException { + return readTermsFacet(in); + } + }; + + @Override public String streamType() { + return STREAM_TYPE; + } + + public static class ByteEntry implements Entry { + + byte term; + int count; + + public ByteEntry(byte term, int count) { + this.term = term; + this.count = count; + } + + public String term() { + return Short.toString(term); + } + + public String getTerm() { + return term(); + } + + @Override public Number termAsNumber() { + return term; + } + + @Override public Number getTermAsNumber() { + return termAsNumber(); + } + + public int count() { + return count; + } + + public int getCount() { + return count(); + } + + @Override public int compareTo(Entry o) { + byte anotherVal = ((ByteEntry) o).term; + int i = term - anotherVal; + if (i == 0) { + i = count - o.count(); + if (i == 0) { + i = System.identityHashCode(this) - System.identityHashCode(o); + } + } + return i; + } + } + + private String name; + + private String fieldName; + + int requiredSize; + + Collection entries = ImmutableList.of(); + + private ComparatorType comparatorType; + + InternalByteTermsFacet() { + } + + public InternalByteTermsFacet(String name, String fieldName, ComparatorType comparatorType, int requiredSize, Collection entries) { + this.name = name; + this.fieldName = fieldName; + this.comparatorType = comparatorType; + this.requiredSize = requiredSize; + this.entries = entries; + } + + @Override public String name() { + return this.name; + } + + @Override public String getName() { + return this.name; + } + + @Override public String fieldName() { + return this.fieldName; + } + + @Override public String getFieldName() { + return fieldName(); + } + + @Override public String type() { + return TYPE; + } + + @Override public String getType() { + return type(); + } + + @Override public ComparatorType comparatorType() { + return comparatorType; + } + + @Override public ComparatorType getComparatorType() { + return comparatorType(); + } + + @Override public List entries() { + if (!(entries instanceof List)) { + entries = ImmutableList.copyOf(entries); + } + return (List) entries; + } + + @Override public List getEntries() { + return entries(); + } + + @SuppressWarnings({"unchecked"}) @Override public Iterator iterator() { + return (Iterator) entries.iterator(); + } + + + private static ThreadLocal> aggregateCache = new ThreadLocal>() { + @Override protected ThreadLocals.CleanableValue initialValue() { + return new ThreadLocals.CleanableValue(new TByteIntHashMap()); + } + }; + + + @Override public Facet reduce(String name, List facets) { + if (facets.size() == 1) { + return facets.get(0); + } + InternalByteTermsFacet first = (InternalByteTermsFacet) facets.get(0); + TByteIntHashMap aggregated = aggregateCache.get().get(); + aggregated.clear(); + + for (Facet facet : facets) { + InternalByteTermsFacet mFacet = (InternalByteTermsFacet) facet; + for (ByteEntry entry : mFacet.entries) { + aggregated.adjustOrPutValue(entry.term, entry.count(), entry.count()); + } + } + + BoundedTreeSet ordered = new BoundedTreeSet(first.comparatorType().comparator(), first.requiredSize); + for (TByteIntIterator it = aggregated.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new ByteEntry(it.key(), it.value())); + } + first.entries = ordered; + return first; + } + + static final class Fields { + static final XContentBuilderString _TYPE = new XContentBuilderString("_type"); + static final XContentBuilderString _FIELD = new XContentBuilderString("_field"); + static final XContentBuilderString TERMS = new XContentBuilderString("terms"); + static final XContentBuilderString TERM = new XContentBuilderString("term"); + static final XContentBuilderString COUNT = new XContentBuilderString("count"); + } + + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(name); + builder.field(Fields._TYPE, TermsFacet.TYPE); + builder.field(Fields._FIELD, fieldName); + builder.startArray(Fields.TERMS); + for (ByteEntry entry : entries) { + builder.startObject(); + builder.field(Fields.TERM, entry.term); + builder.field(Fields.COUNT, entry.count()); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + return builder; + } + + public static InternalByteTermsFacet readTermsFacet(StreamInput in) throws IOException { + InternalByteTermsFacet facet = new InternalByteTermsFacet(); + facet.readFrom(in); + return facet; + } + + @Override public void readFrom(StreamInput in) throws IOException { + name = in.readUTF(); + fieldName = in.readUTF(); + comparatorType = ComparatorType.fromId(in.readByte()); + requiredSize = in.readVInt(); + + int size = in.readVInt(); + entries = new ArrayList(size); + for (int i = 0; i < size; i++) { + entries.add(new ByteEntry(in.readByte(), in.readVInt())); + } + } + + @Override public void writeTo(StreamOutput out) throws IOException { + out.writeUTF(name); + out.writeUTF(fieldName); + out.writeByte(comparatorType.id()); + + out.writeVInt(requiredSize); + + out.writeVInt(entries.size()); + for (ByteEntry entry : entries) { + out.writeByte(entry.term); + out.writeVInt(entry.count()); + } + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java new file mode 100644 index 00000000000..a9fb9f1ddcf --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java @@ -0,0 +1,213 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.facet.terms.bytes; + +import org.apache.lucene.index.IndexReader; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.collect.ImmutableList; +import org.elasticsearch.common.collect.Maps; +import org.elasticsearch.common.thread.ThreadLocals; +import org.elasticsearch.common.trove.TByteIntHashMap; +import org.elasticsearch.common.trove.TByteIntIterator; +import org.elasticsearch.index.cache.field.data.FieldDataCache; +import org.elasticsearch.index.field.data.FieldDataType; +import org.elasticsearch.index.field.data.bytes.ByteFieldData; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.script.search.SearchScript; +import org.elasticsearch.search.facet.AbstractFacetCollector; +import org.elasticsearch.search.facet.Facet; +import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Map; + +/** + * @author kimchy (shay.banon) + */ +public class TermsByteFacetCollector extends AbstractFacetCollector { + + static ThreadLocal>> cache = new ThreadLocal>>() { + @Override protected ThreadLocals.CleanableValue> initialValue() { + return new ThreadLocals.CleanableValue>(new ArrayDeque()); + } + }; + + private final FieldDataCache fieldDataCache; + + private final String fieldName; + + private final String indexFieldName; + + private final TermsFacet.ComparatorType comparatorType; + + private final int size; + + private final int numberOfShards; + + private final FieldDataType fieldDataType; + + private ByteFieldData fieldData; + + private final StaticAggregatorValueProc aggregator; + + private final SearchScript script; + + public TermsByteFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, SearchContext context, + String scriptLang, String script, Map params) { + super(facetName); + this.fieldDataCache = context.fieldDataCache(); + this.size = size; + this.comparatorType = comparatorType; + this.numberOfShards = context.numberOfShards(); + + this.fieldName = fieldName; + + MapperService.SmartNameFieldMappers smartMappers = context.mapperService().smartName(fieldName); + if (smartMappers == null || !smartMappers.hasMapper()) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] doesn't have a type, can't run terms short facet collector on it"); + } else { + // add type filter if there is exact doc mapper associated with it + if (smartMappers.hasDocMapper()) { + setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); + } + + if (smartMappers.mapper().fieldDataType() != FieldDataType.DefaultTypes.BYTE) { + throw new ElasticSearchIllegalArgumentException("Field [" + fieldName + "] is not of byte type, can't run terms short facet collector on it"); + } + + this.indexFieldName = smartMappers.mapper().names().indexName(); + this.fieldDataType = smartMappers.mapper().fieldDataType(); + } + + if (script != null) { + this.script = new SearchScript(context.lookup(), scriptLang, script, params, context.scriptService()); + } else { + this.script = null; + } + + if (this.script == null) { + aggregator = new StaticAggregatorValueProc(popFacets()); + } else { + aggregator = new AggregatorValueProc(popFacets(), this.script); + } + } + + @Override protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + if (script != null) { + script.setNextReader(reader); + } + } + + @Override protected void doCollect(int doc) throws IOException { + fieldData.forEachValueInDoc(doc, aggregator); + } + + @Override public Facet facet() { + TByteIntHashMap facets = aggregator.facets(); + if (facets.isEmpty()) { + pushFacets(facets); + return new InternalByteTermsFacet(facetName, fieldName, comparatorType, size, ImmutableList.of()); + } else { + // we need to fetch facets of "size * numberOfShards" because of problems in how they are distributed across shards + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size * numberOfShards); + for (TByteIntIterator it = facets.iterator(); it.hasNext();) { + it.advance(); + ordered.add(new InternalByteTermsFacet.ByteEntry(it.key(), it.value())); + } + pushFacets(facets); + return new InternalByteTermsFacet(facetName, fieldName, comparatorType, size, ordered); + } + } + + static TByteIntHashMap popFacets() { + Deque deque = cache.get().get(); + if (deque.isEmpty()) { + deque.add(new TByteIntHashMap()); + } + TByteIntHashMap facets = deque.pollFirst(); + facets.clear(); + return facets; + } + + static void pushFacets(TByteIntHashMap facets) { + facets.clear(); + Deque deque = cache.get().get(); + if (deque != null) { + deque.add(facets); + } + } + + public static class AggregatorValueProc extends StaticAggregatorValueProc { + + private final SearchScript script; + + private final Map scriptParams; + + public AggregatorValueProc(TByteIntHashMap facets, SearchScript script) { + super(facets); + this.script = script; + if (script != null) { + scriptParams = Maps.newHashMapWithExpectedSize(4); + } else { + scriptParams = null; + } + } + + @Override public void onValue(int docId, byte value) { + if (script != null) { + scriptParams.put("term", value); + Object scriptValue = script.execute(docId, scriptParams); + if (scriptValue == null) { + return; + } + if (scriptValue instanceof Boolean) { + if (!((Boolean) scriptValue)) { + return; + } + } else { + value = ((Number) scriptValue).byteValue(); + } + } + super.onValue(docId, value); + } + } + + public static class StaticAggregatorValueProc implements ByteFieldData.ValueInDocProc { + + private final TByteIntHashMap facets; + + public StaticAggregatorValueProc(TByteIntHashMap facets) { + this.facets = facets; + } + + @Override public void onValue(int docId, byte value) { + facets.adjustOrPutValue(value, 1, 1); + } + + public final TByteIntHashMap facets() { + return facets; + } + } +} diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java index 114feee018f..386e950adad 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java @@ -32,6 +32,11 @@ import org.elasticsearch.search.facet.histogram.HistogramFacet; import org.elasticsearch.search.facet.range.RangeFacet; import org.elasticsearch.search.facet.statistical.StatisticalFacet; import org.elasticsearch.search.facet.terms.TermsFacet; +import org.elasticsearch.search.facet.terms.bytes.InternalByteTermsFacet; +import org.elasticsearch.search.facet.terms.doubles.InternalDoubleTermsFacet; +import org.elasticsearch.search.facet.terms.ints.InternalIntTermsFacet; +import org.elasticsearch.search.facet.terms.longs.InternalLongTermsFacet; +import org.elasticsearch.search.facet.terms.shorts.InternalShortTermsFacet; import org.elasticsearch.test.integration.AbstractNodesTests; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -209,12 +214,25 @@ public class SimpleFacetsTests extends AbstractNodesTests { } catch (Exception e) { // ignore } - client.admin().indices().prepareCreate("test").execute().actionGet(); + client.admin().indices().prepareCreate("test") + .addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") + .startObject("bstag").field("type", "byte").endObject() + .startObject("shstag").field("type", "short").endObject() + .startObject("istag").field("type", "integer").endObject() + .startObject("lstag").field("type", "long").endObject() + .startObject("fstag").field("type", "float").endObject() + .startObject("dstag").field("type", "double").endObject() + .endObject().endObject().endObject()) + .execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.prepareIndex("test", "type1").setSource(jsonBuilder().startObject() .field("stag", "111") + .field("bstag", 111) + .field("shstag", 111) + .field("istag", 111) .field("lstag", 111) + .field("fstag", 111.1f) .field("dstag", 111.1) .startArray("tag").value("xxx").value("yyy").endArray() .startArray("ltag").value(1000l).value(2000l).endArray() @@ -224,7 +242,11 @@ public class SimpleFacetsTests extends AbstractNodesTests { client.prepareIndex("test", "type1").setSource(jsonBuilder().startObject() .field("stag", "111") + .field("bstag", 111) + .field("shstag", 111) + .field("istag", 111) .field("lstag", 111) + .field("fstag", 111.1f) .field("dstag", 111.1) .startArray("tag").value("zzz").value("yyy").endArray() .startArray("ltag").value(3000l).value(2000l).endArray() @@ -260,12 +282,14 @@ public class SimpleFacetsTests extends AbstractNodesTests { .execute().actionGet(); facet = searchResponse.facets().facet("facet1"); + assertThat(facet, instanceOf(InternalLongTermsFacet.class)); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); assertThat(facet.entries().get(0).term(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); facet = searchResponse.facets().facet("facet2"); + assertThat(facet, instanceOf(InternalLongTermsFacet.class)); assertThat(facet.name(), equalTo("facet2")); assertThat(facet.entries().size(), equalTo(3)); assertThat(facet.entries().get(0).term(), equalTo("2000")); @@ -282,12 +306,14 @@ public class SimpleFacetsTests extends AbstractNodesTests { .execute().actionGet(); facet = searchResponse.facets().facet("facet1"); + assertThat(facet, instanceOf(InternalDoubleTermsFacet.class)); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); assertThat(facet.entries().get(0).term(), equalTo("111.1")); assertThat(facet.entries().get(0).count(), equalTo(2)); facet = searchResponse.facets().facet("facet2"); + assertThat(facet, instanceOf(InternalDoubleTermsFacet.class)); assertThat(facet.name(), equalTo("facet2")); assertThat(facet.entries().size(), equalTo(3)); assertThat(facet.entries().get(0).term(), equalTo("2000.1")); @@ -297,6 +323,42 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet.entries().get(2).term(), anyOf(equalTo("1000.1"), equalTo("3000.1"))); assertThat(facet.entries().get(2).count(), equalTo(1)); + searchResponse = client.prepareSearch() + .setQuery(termQuery("stag", "111")) + .addFacet(termsFacet("facet1").field("bstag").size(10)) + .execute().actionGet(); + + facet = searchResponse.facets().facet("facet1"); + assertThat(facet, instanceOf(InternalByteTermsFacet.class)); + assertThat(facet.name(), equalTo("facet1")); + assertThat(facet.entries().size(), equalTo(1)); + assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).count(), equalTo(2)); + + searchResponse = client.prepareSearch() + .setQuery(termQuery("stag", "111")) + .addFacet(termsFacet("facet1").field("istag").size(10)) + .execute().actionGet(); + + facet = searchResponse.facets().facet("facet1"); + assertThat(facet, instanceOf(InternalIntTermsFacet.class)); + assertThat(facet.name(), equalTo("facet1")); + assertThat(facet.entries().size(), equalTo(1)); + assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).count(), equalTo(2)); + + searchResponse = client.prepareSearch() + .setQuery(termQuery("stag", "111")) + .addFacet(termsFacet("facet1").field("shstag").size(10)) + .execute().actionGet(); + + facet = searchResponse.facets().facet("facet1"); + assertThat(facet, instanceOf(InternalShortTermsFacet.class)); + assertThat(facet.name(), equalTo("facet1")); + assertThat(facet.entries().size(), equalTo(1)); + assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).count(), equalTo(2)); + // Test Facet Filter searchResponse = client.prepareSearch()