From 12d9268db256b728e8d334801739fb46167902b6 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 8 Jul 2013 16:12:12 +0200 Subject: [PATCH] Make field data able to support more than 2B ordinals per segment. Although segments are limited to 2B documents, there is not limit on the number of unique values that a segment may store. This commit replaces 'int' with 'long' every time a number is used to represent an ordinal and modifies the data-structures used to store ordinals so that they can actually support more than 2B ordinals per segment. This commit also improves memory usage of the multi-ordinals data-structures and the transient memory usage which is required to build them (OrdinalsBuilder) by using Lucene's PackedInts data-structures. In the end, loading the ordinals mapping from disk may be a little slower, field-data-based features such as faceting may be slightly slower or faster depending on whether being nicer to the CPU caches balances the overhead of the additional abstraction or not, and memory usage should be better in all cases, especially when the size of the ordinals mapping is not negligible compared to the size of the values (numeric data for example). Close #3189 --- .../util/packed/XAbstractPagedMutable.java | 171 ++++++++ .../lucene/util/packed/XGrowableWriter.java | 162 +++++++ .../lucene/util/packed/XPackedInts.java | 88 ++++ .../util/packed/XPagedGrowableWriter.java | 79 ++++ .../common/util/AbstractBigArray.java | 68 +++ .../common/util/BigDoubleArrayList.java | 75 ++++ .../common/util/BigFloatArrayList.java | 70 ++++ .../common/util/BigIntArray.java | 69 +++ .../elasticsearch/common/util/IntArray.java | 34 ++ .../elasticsearch/common/util/IntArrays.java | 66 +++ .../index/fielddata/BytesValues.java | 16 +- .../index/fielddata/DoubleValues.java | 8 +- .../index/fielddata/LongValues.java | 6 +- .../BytesRefOrdValComparator.java | 40 +- .../fielddata/ordinals/DocIdOrdinals.java | 22 +- .../fielddata/ordinals/EmptyOrdinals.java | 18 +- .../ordinals/MultiFlatArrayOrdinals.java | 189 --------- .../fielddata/ordinals/MultiOrdinals.java | 219 ++++++++++ .../index/fielddata/ordinals/Ordinals.java | 26 +- .../fielddata/ordinals/OrdinalsBuilder.java | 396 +++++++++++------- .../fielddata/ordinals/PositiveIntPool.java | 146 ------- .../ordinals/SinglePackedOrdinals.java | 44 +- .../ordinals/SparseMultiArrayOrdinals.java | 216 ---------- .../plain/DoubleArrayAtomicFieldData.java | 72 ++-- .../plain/DoubleArrayIndexFieldData.java | 25 +- .../plain/FSTBytesAtomicFieldData.java | 47 +-- .../plain/FSTBytesIndexFieldData.java | 14 +- .../plain/FloatArrayAtomicFieldData.java | 72 ++-- .../plain/FloatArrayIndexFieldData.java | 26 +- .../GeoPointDoubleArrayAtomicFieldData.java | 64 +-- .../GeoPointDoubleArrayIndexFieldData.java | 18 +- .../plain/PackedArrayAtomicFieldData.java | 4 +- .../plain/PackedArrayIndexFieldData.java | 7 +- .../plain/PagedBytesAtomicFieldData.java | 39 +- .../plain/PagedBytesIndexFieldData.java | 35 +- .../TermsStringOrdinalsFacetExecutor.java | 46 +- .../index/fielddata/FilterFieldDataTest.java | 14 +- .../ordinals/FlatMultiOrdinalsTests.java | 35 -- .../ordinals/MultiOrdinalsTests.java | 193 ++++----- .../ordinals/SingleOrdinalsTests.java | 6 +- .../ordinals/SparseMultiOrdinalsTests.java | 164 -------- 41 files changed, 1775 insertions(+), 1334 deletions(-) create mode 100644 src/main/java/org/apache/lucene/util/packed/XAbstractPagedMutable.java create mode 100644 src/main/java/org/apache/lucene/util/packed/XGrowableWriter.java create mode 100644 src/main/java/org/apache/lucene/util/packed/XPackedInts.java create mode 100644 src/main/java/org/apache/lucene/util/packed/XPagedGrowableWriter.java create mode 100644 src/main/java/org/elasticsearch/common/util/AbstractBigArray.java create mode 100644 src/main/java/org/elasticsearch/common/util/BigDoubleArrayList.java create mode 100644 src/main/java/org/elasticsearch/common/util/BigFloatArrayList.java create mode 100644 src/main/java/org/elasticsearch/common/util/BigIntArray.java create mode 100644 src/main/java/org/elasticsearch/common/util/IntArray.java create mode 100644 src/main/java/org/elasticsearch/common/util/IntArrays.java delete mode 100644 src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiFlatArrayOrdinals.java create mode 100644 src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiOrdinals.java delete mode 100644 src/main/java/org/elasticsearch/index/fielddata/ordinals/PositiveIntPool.java delete mode 100644 src/main/java/org/elasticsearch/index/fielddata/ordinals/SparseMultiArrayOrdinals.java delete mode 100644 src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/FlatMultiOrdinalsTests.java delete mode 100644 src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SparseMultiOrdinalsTests.java diff --git a/src/main/java/org/apache/lucene/util/packed/XAbstractPagedMutable.java b/src/main/java/org/apache/lucene/util/packed/XAbstractPagedMutable.java new file mode 100644 index 00000000000..f1b3679a365 --- /dev/null +++ b/src/main/java/org/apache/lucene/util/packed/XAbstractPagedMutable.java @@ -0,0 +1,171 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.apache.lucene.util.packed.XPackedInts.checkBlockSize; +import static org.apache.lucene.util.packed.XPackedInts.numBlocks; + +import org.apache.lucene.util.Version; +import org.elasticsearch.common.lucene.Lucene; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * Base implementation for {@link XPagedMutable} and {@link PagedGrowableWriter}. + * @lucene.internal + */ +abstract class XAbstractPagedMutable> { + + static { + // LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640. + assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed"; + } + + static final int MIN_BLOCK_SIZE = 1 << 6; + static final int MAX_BLOCK_SIZE = 1 << 30; + + final long size; + final int pageShift; + final int pageMask; + final PackedInts.Mutable[] subMutables; + final int bitsPerValue; + + XAbstractPagedMutable(int bitsPerValue, long size, int pageSize) { + this.bitsPerValue = bitsPerValue; + this.size = size; + pageShift = checkBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE); + pageMask = pageSize - 1; + final int numPages = numBlocks(size, pageSize); + subMutables = new PackedInts.Mutable[numPages]; + } + + protected final void fillPages() { + final int numPages = numBlocks(size, pageSize()); + for (int i = 0; i < numPages; ++i) { + // do not allocate for more entries than necessary on the last page + final int valueCount = i == numPages - 1 ? lastPageSize(size) : pageSize(); + subMutables[i] = newMutable(valueCount, bitsPerValue); + } + } + + protected abstract PackedInts.Mutable newMutable(int valueCount, int bitsPerValue); + + final int lastPageSize(long size) { + final int sz = indexInPage(size); + return sz == 0 ? pageSize() : sz; + } + + final int pageSize() { + return pageMask + 1; + } + + /** The number of values. */ + public final long size() { + return size; + } + + final int pageIndex(long index) { + return (int) (index >>> pageShift); + } + + final int indexInPage(long index) { + return (int) index & pageMask; + } + + /** Get value at index. */ + public final long get(long index) { + assert index >= 0 && index < size; + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return subMutables[pageIndex].get(indexInPage); + } + + /** Set value at index. */ + public final void set(long index, long value) { + assert index >= 0 && index < size; + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + subMutables[pageIndex].set(indexInPage, value); + } + + protected long baseRamBytesUsed() { + return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + + RamUsageEstimator.NUM_BYTES_OBJECT_REF + + RamUsageEstimator.NUM_BYTES_LONG + + 3 * RamUsageEstimator.NUM_BYTES_INT; + } + + /** Return the number of bytes used by this object. */ + public long ramBytesUsed() { + long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed()); + bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length); + for (PackedInts.Mutable gw : subMutables) { + bytesUsed += gw.ramBytesUsed(); + } + return bytesUsed; + } + + protected abstract T newUnfilledCopy(long newSize); + + /** Create a new copy of size newSize based on the content of + * this buffer. This method is much more efficient than creating a new + * instance and copying values one by one. */ + public final T resize(long newSize) { + final T copy = newUnfilledCopy(newSize); + final int numCommonPages = Math.min(copy.subMutables.length, subMutables.length); + final long[] copyBuffer = new long[1024]; + for (int i = 0; i < copy.subMutables.length; ++i) { + final int valueCount = i == copy.subMutables.length - 1 ? lastPageSize(newSize) : pageSize(); + final int bpv = i < numCommonPages ? subMutables[i].getBitsPerValue() : this.bitsPerValue; + copy.subMutables[i] = newMutable(valueCount, bpv); + if (i < numCommonPages) { + final int copyLength = Math.min(valueCount, subMutables[i].size()); + XPackedInts.copy(subMutables[i], 0, copy.subMutables[i], 0, copyLength, copyBuffer); + } + } + return copy; + } + + /** Similar to {@link ArrayUtil#grow(long[], int)}. */ + public final T grow(long minSize) { + assert minSize >= 0; + if (minSize <= size()) { + @SuppressWarnings("unchecked") + final T result = (T) this; + return result; + } + long extra = minSize >>> 3; + if (extra < 3) { + extra = 3; + } + final long newSize = minSize + extra; + return resize(newSize); + } + + /** Similar to {@link ArrayUtil#grow(long[])}. */ + public final T grow() { + return grow(size() + 1); + } + + @Override + public final String toString() { + return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")"; + } + +} diff --git a/src/main/java/org/apache/lucene/util/packed/XGrowableWriter.java b/src/main/java/org/apache/lucene/util/packed/XGrowableWriter.java new file mode 100644 index 00000000000..c36eea376f5 --- /dev/null +++ b/src/main/java/org/apache/lucene/util/packed/XGrowableWriter.java @@ -0,0 +1,162 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Version; +import org.elasticsearch.common.lucene.Lucene; + +import java.io.IOException; + +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * Implements {@link XPackedInts.Mutable}, but grows the + * bit count of the underlying packed ints on-demand. + *

Beware that this class will accept to set negative values but in order + * to do this, it will grow the number of bits per value to 64. + * + *

@lucene.internal

+ */ +public class XGrowableWriter implements PackedInts.Mutable { + + static { + // LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640. + assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed"; + } + + private long currentMask; + private PackedInts.Mutable current; + private final float acceptableOverheadRatio; + + /** + * @param startBitsPerValue the initial number of bits per value, may grow depending on the data + * @param valueCount the number of values + * @param acceptableOverheadRatio an acceptable overhead ratio + */ + public XGrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) { + this.acceptableOverheadRatio = acceptableOverheadRatio; + current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio); + currentMask = mask(current.getBitsPerValue()); + } + + private static long mask(int bitsPerValue) { + return bitsPerValue == 64 ? ~0L : PackedInts.maxValue(bitsPerValue); + } + + @Override + public long get(int index) { + return current.get(index); + } + + @Override + public int size() { + return current.size(); + } + + @Override + public int getBitsPerValue() { + return current.getBitsPerValue(); + } + + public PackedInts.Mutable getMutable() { + return current; + } + + @Override + public Object getArray() { + return current.getArray(); + } + + @Override + public boolean hasArray() { + return current.hasArray(); + } + + private void ensureCapacity(long value) { + if ((value & currentMask) == value) { + return; + } + final int bitsRequired = value < 0 ? 64 : PackedInts.bitsRequired(value); + assert bitsRequired > current.getBitsPerValue(); + final int valueCount = size(); + PackedInts.Mutable next = PackedInts.getMutable(valueCount, bitsRequired, acceptableOverheadRatio); + PackedInts.copy(current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE); + current = next; + currentMask = mask(current.getBitsPerValue()); + } + + @Override + public void set(int index, long value) { + ensureCapacity(value); + current.set(index, value); + } + + @Override + public void clear() { + current.clear(); + } + + public XGrowableWriter resize(int newSize) { + XGrowableWriter next = new XGrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio); + final int limit = Math.min(size(), newSize); + PackedInts.copy(current, 0, next, 0, limit, PackedInts.DEFAULT_BUFFER_SIZE); + return next; + } + + @Override + public int get(int index, long[] arr, int off, int len) { + return current.get(index, arr, off, len); + } + + @Override + public int set(int index, long[] arr, int off, int len) { + long max = 0; + for (int i = off, end = off + len; i < end; ++i) { + // bitwise or is nice because either all values are positive and the + // or-ed result will require as many bits per value as the max of the + // values, or one of them is negative and the result will be negative, + // forcing GrowableWriter to use 64 bits per value + max |= arr[i]; + } + ensureCapacity(max); + return current.set(index, arr, off, len); + } + + @Override + public void fill(int fromIndex, int toIndex, long val) { + ensureCapacity(val); + current.fill(fromIndex, toIndex, val); + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.alignObjectSize( + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + + RamUsageEstimator.NUM_BYTES_OBJECT_REF + + RamUsageEstimator.NUM_BYTES_LONG + + RamUsageEstimator.NUM_BYTES_FLOAT) + + current.ramBytesUsed(); + } + + @Override + public void save(DataOutput out) throws IOException { + current.save(out); + } + +} diff --git a/src/main/java/org/apache/lucene/util/packed/XPackedInts.java b/src/main/java/org/apache/lucene/util/packed/XPackedInts.java new file mode 100644 index 00000000000..9a6c733c630 --- /dev/null +++ b/src/main/java/org/apache/lucene/util/packed/XPackedInts.java @@ -0,0 +1,88 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Version; +import org.apache.lucene.util.packed.PackedInts.Mutable; +import org.apache.lucene.util.packed.PackedInts.Reader; +import org.elasticsearch.common.lucene.Lucene; + +/** + * Simplistic compression for array of unsigned long values. + * Each value is >= 0 and <= a specified maximum value. The + * values are stored as packed ints, with each value + * consuming a fixed number of bits. + * + * @lucene.internal + */ +public class XPackedInts { + + static { + // LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640. + assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed"; + } + + /** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */ + static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) { + assert buf.length > 0; + int remaining = 0; + while (len > 0) { + final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining)); + assert read > 0; + srcPos += read; + len -= read; + remaining += read; + final int written = dest.set(destPos, buf, 0, remaining); + assert written > 0; + destPos += written; + if (written < remaining) { + System.arraycopy(buf, written, buf, 0, remaining - written); + } + remaining -= written; + } + while (remaining > 0) { + final int written = dest.set(destPos, buf, 0, remaining); + destPos += written; + remaining -= written; + System.arraycopy(buf, written, buf, 0, remaining); + } + } + + /** Check that the block size is a power of 2, in the right bounds, and return + * its log in base 2. */ + static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) { + if (blockSize < minBlockSize || blockSize > maxBlockSize) { + throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize); + } + if ((blockSize & (blockSize - 1)) != 0) { + throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize); + } + return Integer.numberOfTrailingZeros(blockSize); + } + + /** Return the number of blocks required to store size values on + * blockSize. */ + static int numBlocks(long size, int blockSize) { + final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1); + if ((long) numBlocks * blockSize < size) { + throw new IllegalArgumentException("size is too large for this block size"); + } + return numBlocks; + } + +} diff --git a/src/main/java/org/apache/lucene/util/packed/XPagedGrowableWriter.java b/src/main/java/org/apache/lucene/util/packed/XPagedGrowableWriter.java new file mode 100644 index 00000000000..e339de17565 --- /dev/null +++ b/src/main/java/org/apache/lucene/util/packed/XPagedGrowableWriter.java @@ -0,0 +1,79 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Version; +import org.elasticsearch.common.lucene.Lucene; + +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.PackedInts.Mutable; + +/** + * A {@link XPagedGrowableWriter}. This class slices data into fixed-size blocks + * which have independent numbers of bits per value and grow on-demand. + *

You should use this class instead of {@link AppendingLongBuffer} only when + * you need random write-access. Otherwise this class will likely be slower and + * less memory-efficient. + * @lucene.internal + */ +public final class XPagedGrowableWriter extends XAbstractPagedMutable { + + static { + // LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640. + assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed"; + } + + final float acceptableOverheadRatio; + + /** + * Create a new {@link XPagedGrowableWriter} instance. + * + * @param size the number of values to store. + * @param pageSize the number of values per page + * @param startBitsPerValue the initial number of bits per value + * @param acceptableOverheadRatio an acceptable overhead ratio + */ + public XPagedGrowableWriter(long size, int pageSize, + int startBitsPerValue, float acceptableOverheadRatio) { + this(size, pageSize, startBitsPerValue, acceptableOverheadRatio, true); + } + + XPagedGrowableWriter(long size, int pageSize,int startBitsPerValue, float acceptableOverheadRatio, boolean fillPages) { + super(startBitsPerValue, size, pageSize); + this.acceptableOverheadRatio = acceptableOverheadRatio; + if (fillPages) { + fillPages(); + } + } + + @Override + protected Mutable newMutable(int valueCount, int bitsPerValue) { + return new XGrowableWriter(bitsPerValue, valueCount, acceptableOverheadRatio); + } + + @Override + protected XPagedGrowableWriter newUnfilledCopy(long newSize) { + return new XPagedGrowableWriter(newSize, pageSize(), bitsPerValue, acceptableOverheadRatio, false); + } + + @Override + protected long baseRamBytesUsed() { + return super.baseRamBytesUsed() + RamUsageEstimator.NUM_BYTES_FLOAT; + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/AbstractBigArray.java b/src/main/java/org/elasticsearch/common/util/AbstractBigArray.java new file mode 100644 index 00000000000..3cb5267e983 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/AbstractBigArray.java @@ -0,0 +1,68 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import com.google.common.base.Preconditions; + +/** Common implementation for array lists that slice data into fixed-size blocks. */ +abstract class AbstractBigArray { + + private final int pageShift; + private final int pageMask; + protected long size; + + protected AbstractBigArray(int pageSize) { + Preconditions.checkArgument(pageSize >= 128, "pageSize must be >= 128"); + Preconditions.checkArgument((pageSize & (pageSize - 1)) == 0, "pageSize must be a power of two"); + this.pageShift = Integer.numberOfTrailingZeros(pageSize); + this.pageMask = pageSize - 1; + size = 0; + } + + final int numPages(long capacity) { + final long numPages = (capacity + pageMask) >>> pageShift; + Preconditions.checkArgument(numPages <= Integer.MAX_VALUE, "pageSize=" + (pageMask + 1) + " is too small for such as capacity: " + capacity); + return (int) numPages; + } + + final int pageSize() { + return pageMask + 1; + } + + final int pageIndex(long index) { + return (int) (index >>> pageShift); + } + + final int indexInPage(long index) { + return (int) (index & pageMask); + } + + public final long size() { + return size; + } + + protected abstract int numBytesPerElement(); + + public final long sizeInBytes() { + // rough approximate, we only take into account the size of the values, not the overhead of the array objects + return ((long) pageIndex(size - 1) + 1) * pageSize() * numBytesPerElement(); + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/BigDoubleArrayList.java b/src/main/java/org/elasticsearch/common/util/BigDoubleArrayList.java new file mode 100644 index 00000000000..6f213588597 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/BigDoubleArrayList.java @@ -0,0 +1,75 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import org.apache.lucene.util.ArrayUtil; +import org.elasticsearch.common.RamUsage; + +import java.util.Arrays; + +/** Float array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of + * configurable length. */ +public final class BigDoubleArrayList extends AbstractBigArray { + + /** Default page size, 16KB of memory per page. */ + private static final int DEFAULT_PAGE_SIZE = 1 << 11; + + private double[][] pages; + + public BigDoubleArrayList(int pageSize, long initialCapacity) { + super(pageSize); + pages = new double[numPages(initialCapacity)][]; + } + + public BigDoubleArrayList(long initialCapacity) { + this(DEFAULT_PAGE_SIZE, initialCapacity); + } + + public BigDoubleArrayList() { + this(1024); + } + + public double get(long index) { + assert index >= 0 && index < size; + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return pages[pageIndex][indexInPage]; + } + + public void add(double d) { + final int pageIndex = pageIndex(size); + if (pageIndex >= pages.length) { + final int newLength = ArrayUtil.oversize(pageIndex + 1, numBytesPerElement()); + pages = Arrays.copyOf(pages, newLength); + } + if (pages[pageIndex] == null) { + pages[pageIndex] = new double[pageSize()]; + } + final int indexInPage = indexInPage(size); + pages[pageIndex][indexInPage] = d; + ++size; + } + + @Override + protected int numBytesPerElement() { + return RamUsage.NUM_BYTES_DOUBLE; + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/BigFloatArrayList.java b/src/main/java/org/elasticsearch/common/util/BigFloatArrayList.java new file mode 100644 index 00000000000..15ac6dd0a9c --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/BigFloatArrayList.java @@ -0,0 +1,70 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import org.apache.lucene.util.ArrayUtil; +import org.elasticsearch.common.RamUsage; + +/** Float array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of + * configurable length. */ +public final class BigFloatArrayList extends AbstractBigArray { + + /** Default page size, 16KB of memory per page. */ + private static final int DEFAULT_PAGE_SIZE = 1 << 12; + + private float[][] pages; + + public BigFloatArrayList(int pageSize, long initialCapacity) { + super(pageSize); + pages = new float[numPages(initialCapacity)][]; + } + + public BigFloatArrayList(long initialCapacity) { + this(DEFAULT_PAGE_SIZE, initialCapacity); + } + + public BigFloatArrayList() { + this(1024); + } + + public float get(long index) { + assert index >= 0 && index < size; + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return pages[pageIndex][indexInPage]; + } + + public void add(float f) { + final int pageIndex = pageIndex(size); + pages = ArrayUtil.grow(pages, pageIndex + 1); + if (pages[pageIndex] == null) { + pages[pageIndex] = new float[pageSize()]; + } + final int indexInPage = indexInPage(size); + pages[pageIndex][indexInPage] = f; + ++size; + } + + @Override + protected int numBytesPerElement() { + return RamUsage.NUM_BYTES_FLOAT; + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/BigIntArray.java b/src/main/java/org/elasticsearch/common/util/BigIntArray.java new file mode 100644 index 00000000000..669940bd273 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/BigIntArray.java @@ -0,0 +1,69 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import org.elasticsearch.common.RamUsage; + +/** Int array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of + * configurable length. */ +public final class BigIntArray extends AbstractBigArray implements IntArray { + + /** Default page size, 16KB of memory per page. */ + public static final int DEFAULT_PAGE_SIZE = 1 << 12; + + private int[][] pages; + + public BigIntArray(int pageSize, long size) { + super(pageSize); + this.size = size; + pages = new int[numPages(size)][]; + for (int i = 0; i < pages.length; ++i) { + pages[i] = new int[pageSize()]; + } + } + + public BigIntArray(long size) { + this(DEFAULT_PAGE_SIZE, size); + } + + public int get(long index) { + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return pages[pageIndex][indexInPage]; + } + + public void set(long index, int value) { + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + pages[pageIndex][indexInPage] = value; + } + + public int increment(long index, int inc) { + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return pages[pageIndex][indexInPage] += inc; + } + + @Override + protected int numBytesPerElement() { + return RamUsage.NUM_BYTES_INT; + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/IntArray.java b/src/main/java/org/elasticsearch/common/util/IntArray.java new file mode 100644 index 00000000000..234683eb048 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/IntArray.java @@ -0,0 +1,34 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +/** Abstraction of an array of integer values. */ +public interface IntArray { + + /** Get an element given its index. */ + public abstract int get(long index); + + /** Set a value at the given index. */ + public abstract void set(long index, int value); + + /** Increment value at the given index by inc and return the value. */ + public abstract int increment(long index, int inc); + +} diff --git a/src/main/java/org/elasticsearch/common/util/IntArrays.java b/src/main/java/org/elasticsearch/common/util/IntArrays.java new file mode 100644 index 00000000000..1a1f91bbee0 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/IntArrays.java @@ -0,0 +1,66 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +/** Utility methods to work with {@link IntArray}s. */ +public class IntArrays { + + private IntArrays() {} + + /** Return a {@link IntArray} view over the provided array. */ + public static IntArray wrap(final int[] array) { + return new IntArray() { + + private void checkIndex(long index) { + if (index > Integer.MAX_VALUE) { + throw new IndexOutOfBoundsException(Long.toString(index)); + } + } + + @Override + public void set(long index, int value) { + checkIndex(index); + array[(int) index] = value; + } + + @Override + public int increment(long index, int inc) { + checkIndex(index); + return array[(int) index] += inc; + } + + @Override + public int get(long index) { + checkIndex(index); + return array[(int) index]; + } + }; + } + + /** Return a newly allocated {@link IntArray} of the given length or more. */ + public static IntArray allocate(long length) { + if (length <= BigIntArray.DEFAULT_PAGE_SIZE) { + return wrap(new int[(int) length]); + } else { + return new BigIntArray(length); + } + } + +} diff --git a/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java b/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java index 1162163059d..9e8c49fa630 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java @@ -120,10 +120,10 @@ public abstract class BytesValues { public static class Single implements Iter { protected BytesRef value; - protected int ord; + protected long ord; protected boolean done; - public Single reset(BytesRef value, int ord) { + public Single reset(BytesRef value, long ord) { this.value = value; this.ord = ord; this.done = false; @@ -149,8 +149,8 @@ public abstract class BytesValues { static class Multi implements Iter { - protected int innerOrd; - protected int ord; + protected long innerOrd; + protected long ord; protected BytesValues.WithOrdinals withOrds; protected Ordinals.Docs.Iter ordsIter; protected final BytesRef scratch = new BytesRef(); @@ -226,7 +226,7 @@ public abstract class BytesValues { return ordinals; } - public BytesRef getValueByOrd(int ord) { + public BytesRef getValueByOrd(long ord) { return getValueScratchByOrd(ord, scratch); } @@ -247,7 +247,7 @@ public abstract class BytesValues { @Override public BytesRef getValue(int docId) { - final int ord = ordinals.getOrd(docId); + final long ord = ordinals.getOrd(docId); if (ord == 0) { return null; } @@ -268,7 +268,7 @@ public abstract class BytesValues { * result which will also be returned. If there is no value for this docId, the length will be 0. * Note, the bytes are not "safe". */ - public abstract BytesRef getValueScratchByOrd(int ord, BytesRef ret); + public abstract BytesRef getValueScratchByOrd(long ord, BytesRef ret); public static class Empty extends WithOrdinals { @@ -277,7 +277,7 @@ public abstract class BytesValues { } @Override - public BytesRef getValueScratchByOrd(int ord, BytesRef ret) { + public BytesRef getValueScratchByOrd(long ord, BytesRef ret) { ret.length = 0; return ret; } diff --git a/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java b/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java index 69af179b8e3..4d0893c4c95 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java @@ -120,7 +120,7 @@ public abstract class DoubleValues { @Override public final double getValueMissing(int docId, double missingValue) { - final int ord = ordinals.getOrd(docId); + final long ord = ordinals.getOrd(docId); if (ord == 0) { return missingValue; } else { @@ -128,7 +128,7 @@ public abstract class DoubleValues { } } - public abstract double getValueByOrd(int ord); + public abstract double getValueByOrd(long ord); @Override public final Iter getIter(int docId) { @@ -184,8 +184,8 @@ public abstract class DoubleValues { static class Multi implements Iter { - private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter; - private int ord; + private Ordinals.Docs.Iter ordsIter; + private long ord; private WithOrdinals values; public Multi(WithOrdinals values) { diff --git a/src/main/java/org/elasticsearch/index/fielddata/LongValues.java b/src/main/java/org/elasticsearch/index/fielddata/LongValues.java index 91e3840fddc..7e19c60e076 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/LongValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/LongValues.java @@ -118,7 +118,7 @@ public abstract class LongValues { return getValueByOrd(ordinals.getOrd(docId)); } - public abstract long getValueByOrd(int ord); + public abstract long getValueByOrd(long ord); @Override public final Iter getIter(int docId) { @@ -127,7 +127,7 @@ public abstract class LongValues { @Override public final long getValueMissing(int docId, long missingValue) { - final int ord = ordinals.getOrd(docId); + final long ord = ordinals.getOrd(docId); if (ord == 0) { return missingValue; } else { @@ -185,7 +185,7 @@ public abstract class LongValues { static class Multi implements Iter { private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter; - private int ord; + private long ord; private WithOrdinals values; public Multi(WithOrdinals values) { diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java index fe01488f1c3..1d50b66a549 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java +++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java @@ -45,7 +45,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { /* Ords for each slot. @lucene.internal */ - final int[] ords; + final long[] ords; final SortMode sortMode; @@ -75,7 +75,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { /* Bottom ord (same as ords[bottomSlot] once bottomSlot is set). Cached for faster compares. @lucene.internal */ - int bottomOrd; + long bottomOrd; /* True if current bottom slot matches the current reader. @@ -92,7 +92,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { public BytesRefOrdValComparator(IndexFieldData.WithOrdinals indexFieldData, int numHits, SortMode sortMode) { this.indexFieldData = indexFieldData; this.sortMode = sortMode; - ords = new int[numHits]; + ords = new long[numHits]; values = new BytesRef[numHits]; readerGen = new int[numHits]; } @@ -100,7 +100,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public int compare(int slot1, int slot2) { if (readerGen[slot1] == readerGen[slot2]) { - return ords[slot1] - ords[slot2]; + return LongValuesComparator.compare(ords[slot1], ords[slot2]); } final BytesRef val1 = values[slot1]; @@ -207,7 +207,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { final int docOrd = (readerOrds[doc] & 0xFF); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return (int) bottomOrd - docOrd; } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -253,7 +253,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { final int docOrd = (readerOrds[doc] & 0xFFFF); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return (int) bottomOrd - docOrd; } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -299,7 +299,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { final int docOrd = readerOrds[doc]; if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return (int) bottomOrd - docOrd; } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -345,10 +345,10 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; - final int docOrd = readerOrds.getOrd(doc); + final long docOrd = readerOrds.getOrd(doc); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return LongValuesComparator.compare(bottomOrd, docOrd); } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -361,7 +361,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public void copy(int slot, int doc) { - final int ord = readerOrds.getOrd(doc); + final long ord = readerOrds.getOrd(doc); ords[slot] = ord; if (ord == 0) { values[slot] = null; @@ -428,7 +428,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { - final int index = binarySearch(termsIndex, bottomValue); + final long index = binarySearch(termsIndex, bottomValue); if (index < 0) { bottomOrd = -index - 2; bottomSameReader = false; @@ -448,15 +448,15 @@ public final class BytesRefOrdValComparator extends FieldComparator { return values[slot]; } - final protected static int binarySearch(BytesValues.WithOrdinals a, BytesRef key) { + final protected static long binarySearch(BytesValues.WithOrdinals a, BytesRef key) { return binarySearch(a, key, 1, a.ordinals().getNumOrds()); } - final protected static int binarySearch(BytesValues.WithOrdinals a, BytesRef key, int low, int high) { + final protected static long binarySearch(BytesValues.WithOrdinals a, BytesRef key, long low, long high) { assert a.getValueByOrd(high) == null | a.getValueByOrd(high) != null; // make sure we actually can get these values assert a.getValueByOrd(low) == null | a.getValueByOrd(low) != null; while (low <= high) { - int mid = (low + high) >>> 1; + long mid = (low + high) >>> 1; BytesRef midVal = a.getValueByOrd(mid); int cmp; if (midVal != null) { @@ -488,10 +488,10 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public int compareBottom(int doc) throws IOException { - final int docOrd = getRelevantOrd(readerOrds, doc, sortMode); + final long docOrd = getRelevantOrd(readerOrds, doc, sortMode); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return LongValuesComparator.compare(bottomOrd, docOrd); } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -504,7 +504,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public void copy(int slot, int doc) throws IOException { - final int ord = getRelevantOrd(readerOrds, doc, sortMode); + final long ord = getRelevantOrd(readerOrds, doc, sortMode); ords[slot] = ord; if (ord == 0) { values[slot] = null; @@ -561,14 +561,14 @@ public final class BytesRefOrdValComparator extends FieldComparator { return relevantVal; } - static int getRelevantOrd(Ordinals.Docs readerOrds, int docId, SortMode sortMode) { + static long getRelevantOrd(Ordinals.Docs readerOrds, int docId, SortMode sortMode) { Ordinals.Docs.Iter iter = readerOrds.getIter(docId); - int currentVal = iter.next(); + long currentVal = iter.next(); if (currentVal == 0) { return 0; } - int relevantVal = currentVal; + long relevantVal = currentVal; while (true) { if (sortMode == SortMode.MAX) { if (currentVal > relevantVal) { diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/DocIdOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/DocIdOrdinals.java index a77ed9da675..53c4b078bb6 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/DocIdOrdinals.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/DocIdOrdinals.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; import org.elasticsearch.common.RamUsage; /** @@ -64,13 +64,13 @@ public class DocIdOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return numDocs; } @Override - public int getMaxOrd() { - return numDocs + 1; + public long getMaxOrd() { + return 1L + numDocs; } @Override @@ -81,7 +81,7 @@ public class DocIdOrdinals implements Ordinals { public static class Docs implements Ordinals.Docs { private final DocIdOrdinals parent; - private final IntsRef intsScratch = new IntsRef(new int[1], 0, 1); + private final LongsRef longsScratch = new LongsRef(new long[1], 0, 1); private final SingleValueIter iter = new SingleValueIter(); public Docs(DocIdOrdinals parent) { @@ -99,12 +99,12 @@ public class DocIdOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return parent.getNumOrds(); } @Override - public int getMaxOrd() { + public long getMaxOrd() { return parent.getMaxOrd(); } @@ -114,14 +114,14 @@ public class DocIdOrdinals implements Ordinals { } @Override - public int getOrd(int docId) { + public long getOrd(int docId) { return docId + 1; } @Override - public IntsRef getOrds(int docId) { - intsScratch.ints[0] = docId + 1; - return intsScratch; + public LongsRef getOrds(int docId) { + longsScratch.longs[0] = docId + 1; + return longsScratch; } @Override diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/EmptyOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/EmptyOrdinals.java index 0657c3cdb60..2aa86df6ca3 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/EmptyOrdinals.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/EmptyOrdinals.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; /** */ @@ -57,12 +57,12 @@ public class EmptyOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return 0; } @Override - public int getMaxOrd() { + public long getMaxOrd() { return 1; } @@ -74,7 +74,7 @@ public class EmptyOrdinals implements Ordinals { public static class Docs implements Ordinals.Docs { private final EmptyOrdinals parent; - public static final IntsRef EMPTY_INTS_REF = new IntsRef(); + public static final LongsRef EMPTY_LONGS_REF = new LongsRef(); public Docs(EmptyOrdinals parent) { this.parent = parent; @@ -91,12 +91,12 @@ public class EmptyOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return 0; } @Override - public int getMaxOrd() { + public long getMaxOrd() { return 1; } @@ -106,13 +106,13 @@ public class EmptyOrdinals implements Ordinals { } @Override - public int getOrd(int docId) { + public long getOrd(int docId) { return 0; } @Override - public IntsRef getOrds(int docId) { - return EMPTY_INTS_REF; + public LongsRef getOrds(int docId) { + return EMPTY_LONGS_REF; } @Override diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiFlatArrayOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiFlatArrayOrdinals.java deleted file mode 100644 index 17332beeb2c..00000000000 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiFlatArrayOrdinals.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.fielddata.ordinals; - -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.common.RamUsage; - -/** - * "Flat" multi valued ordinals, the first level array size is as the maximum - * values a docId has. Ordinals are populated in order from the first flat array - * value to the next. - */ -public final class MultiFlatArrayOrdinals implements Ordinals { - - // ordinals with value 0 indicates no value - private final int[][] ordinals; - private final int numDocs; - private final int numOrds; - private final int maxOrd; - - private long size = -1; - - public MultiFlatArrayOrdinals(int[][] ordinals, int numOrds) { - assert ordinals.length > 0; - this.ordinals = ordinals; - this.numDocs = ordinals[0].length; - this.numOrds = numOrds; - this.maxOrd = numOrds + 1; - } - - @Override - public boolean hasSingleArrayBackingStorage() { - return false; - } - - @Override - public Object getBackingStorage() { - return ordinals; - } - - @Override - public long getMemorySizeInBytes() { - if (size == -1) { - long size = 0; - size += RamUsage.NUM_BYTES_ARRAY_HEADER; // for the top level array - for (int[] ordinal : ordinals) { - size += RamUsage.NUM_BYTES_INT * ordinal.length + RamUsage.NUM_BYTES_ARRAY_HEADER; - } - this.size = size; - } - return size; - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public int getNumDocs() { - return numDocs; - } - - @Override - public int getNumOrds() { - return numOrds; - } - - @Override - public int getMaxOrd() { - return this.maxOrd; - } - - @Override - public Docs ordinals() { - return new Docs(this, ordinals); - } - - public static class Docs implements Ordinals.Docs { - - private final MultiFlatArrayOrdinals parent; - private final int[][] ordinals; - private final IterImpl iter; - - private final IntsRef intsScratch; - - public Docs(MultiFlatArrayOrdinals parent, int[][] ordinals) { - this.parent = parent; - this.ordinals = ordinals; - this.iter = new IterImpl(ordinals); - this.intsScratch = new IntsRef(new int[16], 0 , 16); - } - - @Override - public Ordinals ordinals() { - return this.parent; - } - - @Override - public int getNumDocs() { - return parent.getNumDocs(); - } - - @Override - public int getNumOrds() { - return parent.getNumOrds(); - } - - @Override - public int getMaxOrd() { - return parent.getMaxOrd(); - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public int getOrd(int docId) { - return ordinals[0][docId]; - } - - @Override - public IntsRef getOrds(int docId) { - intsScratch.offset = 0; - int i; - for (i = 0; i < ordinals.length; i++) { - int ordinal = ordinals[i][docId]; - if (ordinal == 0) { - if (i == 0) { - intsScratch.length = 0; - return intsScratch; - } - break; - } - intsScratch.grow(i+1); - intsScratch.ints[i] = ordinal; - } - intsScratch.length = i; - return intsScratch; - } - - @Override - public Iter getIter(int docId) { - return iter.reset(docId); - } - - public static class IterImpl implements Docs.Iter { - - private final int[][] ordinals; - private int docId; - private int i; - - public IterImpl(int[][] ordinals) { - this.ordinals = ordinals; - } - - public IterImpl reset(int docId) { - this.docId = docId; - this.i = 0; - return this; - } - - @Override - public int next() { - if (i >= ordinals.length) return 0; - return ordinals[i++][docId]; - } - } - } -} diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiOrdinals.java new file mode 100644 index 00000000000..09e0bb8951f --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiOrdinals.java @@ -0,0 +1,219 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.ordinals; + +import org.apache.lucene.util.Version; + +import org.elasticsearch.common.lucene.Lucene; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.packed.AppendingLongBuffer; +import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; +import org.apache.lucene.util.packed.PackedInts; +import org.elasticsearch.common.RamUsage; +import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter; + +/** {@link Ordinals} implementation which is efficient at storing field data ordinals for multi-valued or sparse fields. */ +public class MultiOrdinals implements Ordinals { + + // hard-coded in Lucene 4.3 but will be exposed in Lucene 4.4 + static { + assert Lucene.VERSION == Version.LUCENE_43; + } + private static final int OFFSETS_PAGE_SIZE = 1024; + + /** Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */ + public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) { + final int bitsPerOrd = PackedInts.bitsRequired(numOrds); + // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the + // beginning of the block and all docs have one at the end of the block + final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc; + final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc); + final int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign + final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset; + final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd; + return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes; + } + + private final boolean multiValued; + private final long numOrds; + private final MonotonicAppendingLongBuffer endOffsets; + private final AppendingLongBuffer ords; + + public MultiOrdinals(OrdinalsBuilder builder) { + multiValued = builder.getNumMultiValuesDocs() > 0; + numOrds = builder.getNumOrds(); + endOffsets = new MonotonicAppendingLongBuffer(); + ords = new AppendingLongBuffer(); + long lastEndOffset = 0; + for (int i = 0; i < builder.maxDoc(); ++i) { + final LongsRef docOrds = builder.docOrds(i); + final long endOffset = lastEndOffset + docOrds.length; + endOffsets.add(endOffset); + for (int j = 0; j < docOrds.length; ++j) { + ords.add(docOrds.longs[docOrds.offset + j] - 1); + } + lastEndOffset = endOffset; + } + assert endOffsets.size() == builder.maxDoc(); + assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds(); + } + + @Override + public boolean hasSingleArrayBackingStorage() { + return false; + } + + @Override + public Object getBackingStorage() { + return null; + } + + @Override + public long getMemorySizeInBytes() { + return endOffsets.ramBytesUsed() + ords.ramBytesUsed(); + } + + @Override + public boolean isMultiValued() { + return multiValued; + } + + @Override + public int getNumDocs() { + return (int) endOffsets.size(); + } + + @Override + public long getNumOrds() { + return numOrds; + } + + @Override + public long getMaxOrd() { + return numOrds + 1; + } + + @Override + public Ordinals.Docs ordinals() { + return new MultiDocs(this); + } + + static class MultiDocs implements Ordinals.Docs { + + private final MultiOrdinals ordinals; + private final MonotonicAppendingLongBuffer endOffsets; + private final AppendingLongBuffer ords; + private final LongsRef longsScratch; + private final MultiIter iter; + + MultiDocs(MultiOrdinals ordinals) { + this.ordinals = ordinals; + this.endOffsets = ordinals.endOffsets; + this.ords = ordinals.ords; + this.longsScratch = new LongsRef(16); + this.iter = new MultiIter(ords); + } + + @Override + public Ordinals ordinals() { + return null; + } + + @Override + public int getNumDocs() { + return ordinals.getNumDocs(); + } + + @Override + public long getNumOrds() { + return ordinals.getNumOrds(); + } + + @Override + public long getMaxOrd() { + return ordinals.getMaxOrd(); + } + + @Override + public boolean isMultiValued() { + return ordinals.isMultiValued(); + } + + @Override + public long getOrd(int docId) { + final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0; + final long endOffset = endOffsets.get(docId); + if (startOffset == endOffset) { + return 0L; // ord for missing values + } else { + return 1L + ords.get(startOffset); + } + } + + @Override + public LongsRef getOrds(int docId) { + final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0; + final long endOffset = endOffsets.get(docId); + final int numValues = (int) (endOffset - startOffset); + if (longsScratch.length < numValues) { + longsScratch.longs = new long[ArrayUtil.oversize(numValues, RamUsage.NUM_BYTES_LONG)]; + } + for (int i = 0; i < numValues; ++i) { + longsScratch.longs[i] = 1L + ords.get(startOffset + i); + } + longsScratch.offset = 0; + longsScratch.length = numValues; + return longsScratch; + } + + @Override + public Iter getIter(int docId) { + final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0; + final long endOffset = endOffsets.get(docId); + iter.offset = startOffset; + iter.endOffset = endOffset; + return iter; + } + + } + + static class MultiIter implements Iter { + + final AppendingLongBuffer ordinals; + long offset, endOffset; + + MultiIter(AppendingLongBuffer ordinals) { + this.ordinals = ordinals; + } + + @Override + public long next() { + if (offset >= endOffset) { + return 0L; + } else { + return 1L + ordinals.get(offset++); + } + } + + } + +} diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/Ordinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/Ordinals.java index 1c23e9b0cf9..c7d65e0f1ed 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/Ordinals.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/Ordinals.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; /** * A thread safe ordinals abstraction. Ordinals can only be positive integers. @@ -54,13 +54,13 @@ public interface Ordinals { /** * The number of ordinals, excluding the "0" ordinal indicating a missing value. */ - int getNumOrds(); + long getNumOrds(); /** * Returns total unique ord count; this includes +1 for * the null ord (always 0). */ - int getMaxOrd(); + long getMaxOrd(); /** * Returns a lightweight (non thread safe) view iterator of the ordinals. @@ -88,13 +88,13 @@ public interface Ordinals { /** * The number of ordinals, excluding the "0" ordinal (indicating a missing value). */ - int getNumOrds(); + long getNumOrds(); /** * Returns total unique ord count; this includes +1 for * the null ord (always 0). */ - int getMaxOrd(); + long getMaxOrd(); /** * Is one of the docs maps to more than one ordinal? @@ -105,13 +105,13 @@ public interface Ordinals { * The ordinal that maps to the relevant docId. If it has no value, returns * 0. */ - int getOrd(int docId); + long getOrd(int docId); /** * Returns an array of ordinals matching the docIds, with 0 length one * for a doc with no ordinals. */ - IntsRef getOrds(int docId); + LongsRef getOrds(int docId); /** * Returns an iterator of the ordinals that match the docId, with an @@ -128,7 +128,7 @@ public interface Ordinals { /** * Gets the next ordinal. Returning 0 if the iteration is exhausted. */ - int next(); + long next(); } static class EmptyIter implements Iter { @@ -136,23 +136,23 @@ public interface Ordinals { public static EmptyIter INSTANCE = new EmptyIter(); @Override - public int next() { + public long next() { return 0; } } static class SingleValueIter implements Iter { - private int value; + private long value; - public SingleValueIter reset(int value) { + public SingleValueIter reset(long value) { this.value = value; return this; } @Override - public int next() { - int actual = value; + public long next() { + long actual = value; value = 0; return actual; } diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java index 235ff6b23c7..7b832d0fe0d 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java @@ -1,4 +1,3 @@ -package org.elasticsearch.index.fielddata.ordinals; /* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file @@ -17,21 +16,21 @@ package org.elasticsearch.index.fielddata.ordinals; * specific language governing permissions and limitations * under the License. */ + +package org.elasticsearch.index.fielddata.ordinals; + import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FilteredTermsEnum; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.*; -import org.apache.lucene.util.IntBlockPool.Allocator; -import org.apache.lucene.util.IntBlockPool.DirectAllocator; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; -import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.apache.lucene.util.packed.XPagedGrowableWriter; import org.elasticsearch.common.settings.Settings; import java.io.Closeable; import java.io.IOException; -import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; /** @@ -41,54 +40,251 @@ import java.util.Comparator; */ public final class OrdinalsBuilder implements Closeable { - private final int maxDoc; - private int[] mvOrds; - private GrowableWriter svOrds; + /** Default acceptable overhead ratio. {@link OrdinalsBuilder} memory usage is mostly transient so it is likely a better trade-off to + * trade memory for speed in order to resize less often. */ + public static final float DEFAULT_ACCEPTABLE_OVERHEAD_RATIO = PackedInts.FAST; - private int[] offsets; - private final IntBlockPool pool; - private final IntBlockPool.SliceWriter writer; - private final IntsRef intsRef = new IntsRef(1); - private final IntBlockPool.SliceReader reader; - private int currentOrd = 0; + /** The following structure is used to store ordinals. The idea is to store ords on levels of increasing sizes. Level 0 stores + * 1 value and 1 pointer to level 1. Level 1 stores 2 values and 1 pointer to level 2, ..., Level n stores 2**n values and + * 1 pointer to level n+1. If at some point an ordinal or a pointer has 0 as a value, this means that there are no remaining + * values. On the first level, ordinals.get(docId) is the first ordinal for docId or 0 if the document has no ordinals. On + * subsequent levels, the first 2^level slots are reserved and all have 0 as a value. + *

+     * Example for an index of 3 docs (O=ordinal, P = pointer)
+     * Level 0:
+     *   ordinals           [1] [4] [2]
+     *   nextLevelSlices    2  0  1
+     * Level 1:
+     *   ordinals           [0  0] [2  0] [3  4]
+     *   nextLevelSlices    0  0  1
+     * Level 2:
+     *   ordinals           [0  0  0  0] [5  0  0  0]
+     *   nextLevelSlices    0  0
+     * 
+ * On level 0, all documents have an ordinal: 0 has 1, 1 has 4 and 2 has 2 as a first ordinal, this means that we need to read + * nextLevelEntries to get the index of their ordinals on the next level. The entry for document 1 is 0, meaning that we have + * already read all its ordinals. On the contrary 0 and 2 have more ordinals which are stored at indices 2 and 1. Let's continue + * with document 2: it has 2 more ordinals on level 1: 3 and 4 and its next level index is 1 meaning that there are remaining + * ordinals on the next level. On level 2 at index 1, we can read [5 0 0 0] meaning that 5 is an ordinal as well, but the + * fact that it is followed by zeros means that there are no more ordinals. In the end, document 2 has 2, 3, 4 and 5 as ordinals. + * + * In addition to these structures, there is another array which stores the current position (level + slice + offset in the slice) + * in order to be able to append data in constant time. + */ + private static class OrdinalsStore { + + private static final int PAGE_SIZE = 1 << 12; + + /** Number of slots at level */ + private static int numSlots(int level) { + return 1 << level; + } + + private static int slotsMask(int level) { + return numSlots(level) - 1; + } + + /** Encode the position for the given level and offset. The idea is to encode the level using unary coding in the lower bits and + * then the offset in the higher bits. */ + private static long position(int level, long offset) { + assert level >= 1; + return (1 << (level - 1)) | (offset << level); + } + + /** Decode the level from an encoded position. */ + private static int level(long position) { + return 1 + Long.numberOfTrailingZeros(position); + } + + /** Decode the offset from the position. */ + private static long offset(long position, int level) { + return position >>> level; + } + + /** Get the ID of the slice given an offset. */ + private static long sliceID(int level, long offset) { + return offset >>> level; + } + + /** Compute the first offset of the given slice. */ + private static long startOffset(int level, long slice) { + return slice << level; + } + + /** Compute the number of ordinals stored for a value given its current position. */ + private static int numOrdinals(int level, long offset) { + return (1 << level) + (int) (offset & slotsMask(level)); + } + + // Current position + private XPagedGrowableWriter positions; + // First level (0) of ordinals and pointers to the next level + private final GrowableWriter firstOrdinals; + private XPagedGrowableWriter firstNextLevelSlices; + // Ordinals and pointers for other levels, starting at 1 + private final XPagedGrowableWriter[] ordinals; + private final XPagedGrowableWriter[] nextLevelSlices; + private final int[] sizes; + + private final int startBitsPerValue; + private final float acceptableOverheadRatio; + + OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) { + this.startBitsPerValue = startBitsPerValue; + this.acceptableOverheadRatio = acceptableOverheadRatio; + positions = new XPagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio); + firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio); + // over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc... + ordinals = new XPagedGrowableWriter[24]; + nextLevelSlices = new XPagedGrowableWriter[24]; + sizes = new int[24]; + Arrays.fill(sizes, 1); // reserve the 1st slice on every level + } + + /** Allocate a new slice and return its ID. */ + private long newSlice(int level) { + final long newSlice = sizes[level]++; + // Lazily allocate ordinals + if (ordinals[level] == null) { + ordinals[level] = new XPagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio); + } else { + ordinals[level] = ordinals[level].grow(sizes[level] * numSlots(level)); + if (nextLevelSlices[level] != null) { + nextLevelSlices[level] = nextLevelSlices[level].grow(sizes[level]); + } + } + return newSlice; + } + + public int addOrdinal(int docID, long ordinal) { + final long position = positions.get(docID); + + if (position == 0L) { // on the first level + // 0 or 1 ordinal + if (firstOrdinals.get(docID) == 0L) { + firstOrdinals.set(docID, ordinal); + return 1; + } else { + final long newSlice = newSlice(1); + if (firstNextLevelSlices == null) { + firstNextLevelSlices = new XPagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio); + } + firstNextLevelSlices.set(docID, newSlice); + final long offset = startOffset(1, newSlice); + ordinals[1].set(offset, ordinal); + positions.set(docID, position(1, offset)); // current position is on the 1st level and not allocated yet + return 2; + } + } else { + int level = level(position); + long offset = offset(position, level); + assert offset != 0L; + if (((offset + 1) & slotsMask(level)) == 0L) { + // reached the end of the slice, allocate a new one on the next level + final long newSlice = newSlice(level + 1); + if (nextLevelSlices[level] == null) { + nextLevelSlices[level] = new XPagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio); + } + nextLevelSlices[level].set(sliceID(level, offset), newSlice); + ++level; + offset = startOffset(level, newSlice); + assert (offset & slotsMask(level)) == 0L; + } else { + // just go to the next slot + ++offset; + } + ordinals[level].set(offset, ordinal); + final long newPosition = position(level, offset); + positions.set(docID, newPosition); + return numOrdinals(level, offset); + } + } + + public void appendOrdinals(int docID, LongsRef ords) { + // First level + final long firstOrd = firstOrdinals.get(docID); + if (firstOrd == 0L) { + return; + } + ords.longs = ArrayUtil.grow(ords.longs, ords.offset + ords.length + 1); + ords.longs[ords.offset + ords.length++] = firstOrd; + if (firstNextLevelSlices == null) { + return; + } + long sliceID = firstNextLevelSlices.get(docID); + if (sliceID == 0L) { + return; + } + // Other levels + for (int level = 1; ; ++level) { + final int numSlots = numSlots(level); + ords.longs = ArrayUtil.grow(ords.longs, ords.offset + ords.length + numSlots); + final long offset = startOffset(level, sliceID); + for (int j = 0; j < numSlots; ++j) { + final long ord = ordinals[level].get(offset + j); + if (ord == 0L) { + return; + } + ords.longs[ords.offset + ords.length++] = ord; + } + if (nextLevelSlices[level] == null) { + return; + } + sliceID = nextLevelSlices[level].get(sliceID); + if (sliceID == 0L) { + return; + } + } + } + + } + + private final int maxDoc; + private long currentOrd = 0; private int numDocsWithValue = 0; private int numMultiValuedDocs = 0; private int totalNumOrds = 0; - public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator, float acceptableOverheadRatio) throws IOException { + private OrdinalsStore ordinals; + private final LongsRef spare; + + public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException { this.maxDoc = maxDoc; - if (preDefineBitsRequired) { - int numTerms = (int) terms.size(); - if (numTerms == -1) { - svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio); - } else { - svOrds = new GrowableWriter(PackedInts.bitsRequired(numTerms), maxDoc, acceptableOverheadRatio); - } - } else { - svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio); + int startBitsPerValue = 8; + if (numTerms >= 0) { + startBitsPerValue = PackedInts.bitsRequired(numTerms); } - pool = new IntBlockPool(allocator); - reader = new IntBlockPool.SliceReader(pool); - writer = new IntBlockPool.SliceWriter(pool); + ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio); + spare = new LongsRef(); } + public OrdinalsBuilder(int maxDoc, float acceptableOverheadRatio) throws IOException { + this(-1, maxDoc, acceptableOverheadRatio); + } + public OrdinalsBuilder(int maxDoc) throws IOException { - this(null, false, maxDoc, PackedInts.DEFAULT); + this(maxDoc, DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); } - public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, float acceptableOverheadRatio) throws IOException { - this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator(), acceptableOverheadRatio); + /** + * Returns a shared {@link LongsRef} instance for the given doc ID holding all ordinals associated with it. + */ + public LongsRef docOrds(int docID) { + spare.offset = spare.length = 0; + ordinals.appendOrdinals(docID, spare); + return spare; } - public OrdinalsBuilder(Terms terms, int maxDoc, float acceptableOverheadRatio) throws IOException { - this(terms, true, maxDoc, new DirectAllocator(), acceptableOverheadRatio); + /** Return a {@link PackedInts.Reader} instance mapping every doc ID to its first ordinal if it exists and 0 otherwise. */ + public PackedInts.Reader getFirstOrdinals() { + return ordinals.firstOrdinals; } /** * Advances the {@link OrdinalsBuilder} to the next ordinal and * return the current ordinal. */ - public int nextOrdinal() { + public long nextOrdinal() { return ++currentOrd; } @@ -96,7 +292,7 @@ public final class OrdinalsBuilder implements Closeable { * Retruns the current ordinal or 0 if this build has not been advanced via * {@link #nextOrdinal()}. */ - public int currentOrdinal() { + public long currentOrdinal() { return currentOrd; } @@ -105,42 +301,11 @@ public final class OrdinalsBuilder implements Closeable { */ public OrdinalsBuilder addDoc(int doc) { totalNumOrds++; - if (svOrds != null) { - int docsOrd = (int) svOrds.get(doc); - if (docsOrd == 0) { - svOrds.set(doc, currentOrd); - numDocsWithValue++; - } else { - // Rebuilding ords that supports mv based on sv ords. - mvOrds = new int[maxDoc]; - for (int docId = 0; docId < maxDoc; docId++) { - mvOrds[docId] = (int) svOrds.get(docId); - } - svOrds = null; - } - } - - if (mvOrds != null) { - int docsOrd = mvOrds[doc]; - if (docsOrd == 0) { - mvOrds[doc] = currentOrd; - numDocsWithValue++; - } else if (docsOrd > 0) { - numMultiValuedDocs++; - int offset = writer.startNewSlice(); - writer.writeInt(docsOrd); - writer.writeInt(currentOrd); - if (offsets == null) { - offsets = new int[mvOrds.length]; - } - offsets[doc] = writer.getCurrentOffset(); - mvOrds[doc] = (-1 * offset) - 1; - } else { - assert offsets != null; - writer.reset(offsets[doc]); - writer.writeInt(currentOrd); - offsets[doc] = writer.getCurrentOffset(); - } + final int numValues = ordinals.addOrdinal(doc, currentOrd); + if (numValues == 1) { + ++numDocsWithValue; + } else if (numValues == 2) { + ++numMultiValuedDocs; } return this; } @@ -149,7 +314,7 @@ public final class OrdinalsBuilder implements Closeable { * Returns true iff this builder contains a document ID that is associated with more than one ordinal. Otherwise false; */ public boolean isMultiValued() { - return offsets != null; + return numMultiValuedDocs > 0; } /** @@ -183,7 +348,7 @@ public final class OrdinalsBuilder implements Closeable { /** * Returns the number of distinct ordinals in this builder. */ - public int getNumOrds() { + public long getNumOrds() { return currentOrd; } @@ -196,18 +361,9 @@ public final class OrdinalsBuilder implements Closeable { return null; } final FixedBitSet bitSet = new FixedBitSet(maxDoc); - if (svOrds != null) { - for (int docId = 0; docId < maxDoc; docId++) { - int ord = (int) svOrds.get(docId); - if (ord != 0) { - bitSet.set(docId); - } - } - } else { - for (int docId = 0; docId < maxDoc; docId++) { - if (mvOrds[docId] != 0) { - bitSet.set(docId); - } + for (int docID = 0; docID < maxDoc; ++docID) { + if (ordinals.firstOrdinals.get(docID) != 0) { + bitSet.set(docID); } } return bitSet; @@ -217,72 +373,15 @@ public final class OrdinalsBuilder implements Closeable { * Builds an {@link Ordinals} instance from the builders current state. */ public Ordinals build(Settings settings) { - if (numMultiValuedDocs == 0) { - return new SinglePackedOrdinals(svOrds.getMutable(), getNumOrds()); - } - final String multiOrdinals = settings.get("multi_ordinals", "sparse"); - if ("flat".equals(multiOrdinals)) { - final ArrayList ordinalBuffer = new ArrayList(); - for (int i = 0; i < mvOrds.length; i++) { - final IntsRef docOrds = docOrds(i); - while (ordinalBuffer.size() < docOrds.length) { - ordinalBuffer.add(new int[mvOrds.length]); - } - - for (int j = docOrds.offset; j < docOrds.offset+docOrds.length; j++) { - ordinalBuffer.get(j)[i] = docOrds.ints[j]; - } - } - int[][] nativeOrdinals = new int[ordinalBuffer.size()][]; - for (int i = 0; i < nativeOrdinals.length; i++) { - nativeOrdinals[i] = ordinalBuffer.get(i); - } - return new MultiFlatArrayOrdinals(nativeOrdinals, getNumOrds()); - } else if ("sparse".equals(multiOrdinals)) { - int multiOrdinalsMaxDocs = settings.getAsInt("multi_ordinals_max_docs", 16777216 /* Equal to 64MB per storeage array */); - return new SparseMultiArrayOrdinals(this, multiOrdinalsMaxDocs); + final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.COMPACT); + if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getNumOrds())) { + // MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields + return new MultiOrdinals(this); } else { - throw new ElasticSearchIllegalArgumentException("no applicable fielddata multi_ordinals value, got [" + multiOrdinals + "]"); + return new SinglePackedOrdinals(this, acceptableOverheadRatio); } } - /** - * Returns a shared {@link IntsRef} instance for the given doc ID holding all ordinals associated with it. - */ - public IntsRef docOrds(int doc) { - if (svOrds != null) { - int docsOrd = (int) svOrds.get(doc); - intsRef.offset = 0; - if (docsOrd == 0) { - intsRef.length = 0; - } else if (docsOrd > 0) { - intsRef.ints[0] = docsOrd; - intsRef.length = 1; - } - } else { - int docsOrd = mvOrds[doc]; - intsRef.offset = 0; - if (docsOrd == 0) { - intsRef.length = 0; - } else if (docsOrd > 0) { - intsRef.ints[0] = mvOrds[doc]; - intsRef.length = 1; - } else { - assert offsets != null; - reader.reset(-1 * (mvOrds[doc] + 1), offsets[doc]); - int pos = 0; - while (!reader.endOfSlice()) { - if (intsRef.ints.length <= pos) { - intsRef.ints = ArrayUtil.grow(intsRef.ints, pos + 1); - } - intsRef.ints[pos++] = reader.readInt(); - } - intsRef.length = pos; - } - } - return intsRef; - } - /** * Returns the maximum document ID this builder can associate with an ordinal */ @@ -364,7 +463,6 @@ public final class OrdinalsBuilder implements Closeable { */ @Override public void close() throws IOException { - pool.reset(true, false); - offsets = null; + ordinals = null; } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/PositiveIntPool.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/PositiveIntPool.java deleted file mode 100644 index 48e4fb0f811..00000000000 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/PositiveIntPool.java +++ /dev/null @@ -1,146 +0,0 @@ -package org.elasticsearch.index.fielddata.ordinals; - -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.ElasticSearchIllegalArgumentException; -import org.elasticsearch.common.RamUsage; - -/** - * An efficient store for positive integer slices. This pool uses multiple - * sliced arrays to hold integers in int array pages rather than an object based - * datastructures. - */ -final class PositiveIntPool { - // TODO it might be useful to store the size of the slices in a sep - // datastructure rather than useing a negative value to donate this. - private final int blockShift; - private final int blockMask; - private final int blockSize; - /** - * array of buffers currently used in the pool. Buffers are allocated if - * needed don't modify this outside of this class - */ - private int[][] buffers = new int[10][]; - - /** - * index into the buffers array pointing to the current buffer used as the - * head - */ - private int bufferUpto = -1; - /** Pointer to the current position in head buffer */ - private int intUpto; - /** Current head buffer */ - private int[] buffer; - /** Current head offset */ - private int intOffset; - - - /** - * Creates a new {@link PositiveIntPool} with the given blockShift. - * - * @param blockShift - * the n-the power of two indicating the size of each block in - * the paged datastructure. BlockSize = 1 << blockShift - */ - public PositiveIntPool(int blockShift) { - this.blockShift = blockShift; - this.blockSize = 1 << blockShift; - this.blockMask = blockSize - 1; - this.intUpto = blockSize; - this.intOffset = -blockSize; - } - - /** - * Adds all integers in the given slices and returns the positive offset - * into the datastructure to retrive this slice. - */ - public int put(IntsRef slice) { - if ( slice.length > blockSize) { - throw new ElasticSearchIllegalArgumentException("Can not store slices greater or equal to: " + blockSize); - } - if ((intUpto + slice.length) > blockSize) { - nextBuffer(); - } - final int relativeOffset = intUpto; - System.arraycopy(slice.ints, slice.offset, buffer, relativeOffset, slice.length); - intUpto += slice.length; - buffer[intUpto - 1] *= -1; // mark as end - return relativeOffset + intOffset; - } - - /** - * Returns the first value of the slice stored at the given offset. - *

- * Note: the slice length must be greater than one otherwise the returned - * value is the negative complement of the actual value - *

- */ - public int getFirstFromOffset(int offset) { - final int blockOffset = offset >> blockShift; - final int relativeOffset = offset & blockMask; - final int[] currentBuffer = buffers[blockOffset]; - assert currentBuffer[relativeOffset] >= 0; - return currentBuffer[relativeOffset]; - } - - /** - * Retrieves a previously stored slice from the pool. - * - * @param slice the slice to fill - * @param offset the offset where the slice is stored - */ - public void fill(IntsRef slice, int offset) { - final int blockOffset = offset >> blockShift; - final int relativeOffset = offset & blockMask; - final int[] currentBuffer = buffers[blockOffset]; - slice.offset = 0; - slice.length = 0; - for (int i = relativeOffset; i < currentBuffer.length; i++) { - slice.length++; - if (currentBuffer[i] < 0) { - break; - } - - } - if (slice.length != 0) { - slice.ints = ArrayUtil.grow(slice.ints, slice.length); - System.arraycopy(currentBuffer, relativeOffset, slice.ints, 0, slice.length); - slice.ints[slice.length-1] *= -1; - } - } - - public long getMemorySizeInBytes() { - return ((bufferUpto + 1) * blockSize * RamUsage.NUM_BYTES_INT) + ((bufferUpto + 1) * RamUsage.NUM_BYTES_ARRAY_HEADER); - } - - private void nextBuffer() { - if (1 + bufferUpto == buffers.length) { - int[][] newBuffers = new int[(int) (buffers.length * 1.5)][]; - System.arraycopy(buffers, 0, newBuffers, 0, buffers.length); - buffers = newBuffers; - } - buffer = buffers[1 + bufferUpto] = new int[blockSize]; - bufferUpto++; - intUpto = 0; - intOffset += blockSize; - } - -} diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SinglePackedOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/SinglePackedOrdinals.java index 175b5ec7950..8647699ddcd 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SinglePackedOrdinals.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/SinglePackedOrdinals.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.common.RamUsage; @@ -29,15 +29,19 @@ public class SinglePackedOrdinals implements Ordinals { // ordinals with value 0 indicates no value private final PackedInts.Reader reader; - private final int numOrds; - private final int maxOrd; + private final long numOrds; + private final long maxOrd; private long size = -1; - public SinglePackedOrdinals(PackedInts.Reader reader, int numOrds) { + public SinglePackedOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) { + assert builder.getNumMultiValuesDocs() == 0; + this.numOrds = builder.getNumOrds(); + this.maxOrd = builder.getNumOrds() + 1; + // We don't reuse the builder as-is because it might have been built with a higher overhead ratio + final PackedInts.Mutable reader = PackedInts.getMutable(builder.maxDoc(), PackedInts.bitsRequired(getNumOrds()), acceptableOverheadRatio); + PackedInts.copy(builder.getFirstOrdinals(), 0, reader, 0, builder.maxDoc(), 8 * 1024); this.reader = reader; - this.numOrds = numOrds; - this.maxOrd = numOrds + 1; } @Override @@ -72,12 +76,12 @@ public class SinglePackedOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return numOrds; } @Override - public int getMaxOrd() { + public long getMaxOrd() { return maxOrd; } @@ -91,7 +95,7 @@ public class SinglePackedOrdinals implements Ordinals { private final SinglePackedOrdinals parent; private final PackedInts.Reader reader; - private final IntsRef intsScratch = new IntsRef(1); + private final LongsRef longsScratch = new LongsRef(1); private final SingleValueIter iter = new SingleValueIter(); public Docs(SinglePackedOrdinals parent, PackedInts.Reader reader) { @@ -110,12 +114,12 @@ public class SinglePackedOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return parent.getNumOrds(); } @Override - public int getMaxOrd() { + public long getMaxOrd() { return parent.getMaxOrd(); } @@ -125,21 +129,21 @@ public class SinglePackedOrdinals implements Ordinals { } @Override - public int getOrd(int docId) { - return (int) reader.get(docId); + public long getOrd(int docId) { + return reader.get(docId); } @Override - public IntsRef getOrds(int docId) { - final int ordinal = (int) reader.get(docId); + public LongsRef getOrds(int docId) { + final long ordinal = reader.get(docId); if (ordinal == 0) { - intsScratch.length = 0; + longsScratch.length = 0; } else { - intsScratch.offset = 0; - intsScratch.length = 1; - intsScratch.ints[0] = ordinal; + longsScratch.offset = 0; + longsScratch.length = 1; + longsScratch.longs[0] = ordinal; } - return intsScratch; + return longsScratch; } @Override diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SparseMultiArrayOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/SparseMultiArrayOrdinals.java deleted file mode 100644 index 55cd3e5384f..00000000000 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SparseMultiArrayOrdinals.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.fielddata.ordinals; - -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.common.RamUsage; - -/** - * Ordinals implementation that stores the ordinals into sparse fixed arrays. - *

- * This prevents large ordinal arrays that are created in for example {@link MultiFlatArrayOrdinals} when - * only a few documents have a lot of terms per field. - */ -public final class SparseMultiArrayOrdinals implements Ordinals { - - private final int[] lookup; - private final PositiveIntPool pool; - private final int numOrds; - private final int maxOrd; - private final int numDocs; - private long size = -1; - - public SparseMultiArrayOrdinals(OrdinalsBuilder builder, int maxSize) { - int blockShift = Math.min(floorPow2(builder.getTotalNumOrds() << 1), floorPow2(maxSize)); - this.pool = new PositiveIntPool(Math.max(4, blockShift)); - this.numDocs = builder.maxDoc(); - - - this.lookup = new int[numDocs]; - this.numOrds = builder.getNumOrds(); - this.maxOrd = numOrds + 1; - IntsRef spare; - for (int doc = 0; doc < numDocs; doc++) { - spare = builder.docOrds(doc); - final int size = spare.length; - if (size == 0) { - lookup[doc] = 0; - } else if (size == 1) { - lookup[doc] = spare.ints[spare.offset]; - } else { - int offset = pool.put(spare); - lookup[doc] = -(offset) - 1; - } - } - } - - private static int floorPow2(int number) { - return 31 - Integer.numberOfLeadingZeros(number); - } - - @Override - public boolean hasSingleArrayBackingStorage() { - return false; - } - - @Override - public Object getBackingStorage() { - return null; - } - - @Override - public long getMemorySizeInBytes() { - if (size == -1) { - size = (RamUsage.NUM_BYTES_ARRAY_HEADER + (RamUsage.NUM_BYTES_INT * lookup.length)) + pool.getMemorySizeInBytes(); - } - return size; - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public int getNumDocs() { - return numDocs; - } - - @Override - public int getNumOrds() { - return numOrds; - } - - @Override - public int getMaxOrd() { - return maxOrd; - } - - @Override - public Docs ordinals() { - return new Docs(this, lookup, pool); - } - - static class Docs implements Ordinals.Docs { - - private final SparseMultiArrayOrdinals parent; - private final int[] lookup; - - private final IterImpl iter; - private final PositiveIntPool pool; - private final IntsRef spare = new IntsRef(1); - - public Docs(SparseMultiArrayOrdinals parent, int[] lookup, PositiveIntPool pool) { - this.parent = parent; - this.lookup = lookup; - this.pool = pool; - this.iter = new IterImpl(lookup, pool); - } - - @Override - public Ordinals ordinals() { - return this.parent; - } - - @Override - public int getNumDocs() { - return parent.getNumDocs(); - } - - @Override - public int getNumOrds() { - return parent.getNumOrds(); - } - - @Override - public int getMaxOrd() { - return parent.getMaxOrd(); - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public int getOrd(int docId) { - int pointer = lookup[docId]; - if (pointer < 0) { - return pool.getFirstFromOffset(-(pointer + 1)); - } - return pointer; - } - - @Override - public IntsRef getOrds(int docId) { - spare.offset = 0; - int pointer = lookup[docId]; - if (pointer == 0) { - spare.length = 0; - } else if (pointer > 0) { - spare.length = 1; - spare.ints[0] = pointer; - return spare; - } else { - pool.fill(spare, -(pointer + 1)); - return spare; - } - return spare; - } - - @Override - public Iter getIter(int docId) { - return iter.reset(docId); - } - - class IterImpl implements Docs.Iter { - private final int[] lookup; - private final PositiveIntPool pool; - private final IntsRef slice = new IntsRef(1); - private int valuesOffset; - - public IterImpl(int[] lookup, PositiveIntPool pool) { - this.lookup = lookup; - this.pool = pool; - } - - public IterImpl reset(int docId) { - final int pointer = lookup[docId]; - if (pointer < 0) { - pool.fill(slice, -(pointer + 1)); - } else { - slice.ints[0] = pointer; - slice.offset = 0; - slice.length = 1; - } - valuesOffset = 0; - return this; - } - - @Override - public int next() { - if (valuesOffset >= slice.length) { - return 0; - } - return slice.ints[slice.offset + (valuesOffset++)]; - } - } - } -} diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayAtomicFieldData.java index de603b5b1b4..6b0949d5136 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayAtomicFieldData.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.RamUsage; +import org.elasticsearch.common.util.BigDoubleArrayList; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.ordinals.Ordinals; @@ -30,14 +31,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData public static final DoubleArrayAtomicFieldData EMPTY = new Empty(); - protected final double[] values; private final int numDocs; protected long size = -1; - public DoubleArrayAtomicFieldData(double[] values, int numDocs) { + public DoubleArrayAtomicFieldData(int numDocs) { super(true); - this.values = values; this.numDocs = numDocs; } @@ -53,7 +52,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData static class Empty extends DoubleArrayAtomicFieldData { Empty() { - super(null, 0); + super(0); } @Override @@ -94,10 +93,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData public static class WithOrdinals extends DoubleArrayAtomicFieldData { + private final BigDoubleArrayList values; private final Ordinals ordinals; - public WithOrdinals(double[] values, int numDocs, Ordinals ordinals) { - super(values, numDocs); + public WithOrdinals(BigDoubleArrayList values, int numDocs, Ordinals ordinals) { + super(numDocs); + this.values = values; this.ordinals = ordinals; } @@ -114,7 +115,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + ordinals.getMemorySizeInBytes(); + size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes(); } return size; } @@ -133,31 +134,31 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals { - private final double[] values; + private final BigDoubleArrayList values; - LongValues(double[] values, Ordinals.Docs ordinals) { + LongValues(BigDoubleArrayList values, Ordinals.Docs ordinals) { super(ordinals); this.values = values; } @Override - public final long getValueByOrd(int ord) { - return (long) values[ord]; + public final long getValueByOrd(long ord) { + return (long) values.get(ord); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals { - private final double[] values; + private final BigDoubleArrayList values; - DoubleValues(double[] values, Ordinals.Docs ordinals) { + DoubleValues(BigDoubleArrayList values, Ordinals.Docs ordinals) { super(ordinals); this.values = values; } @Override - public double getValueByOrd(int ord) { - return values[ord]; + public double getValueByOrd(long ord) { + return values.get(ord); } } } @@ -168,10 +169,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData */ public static class SingleFixedSet extends DoubleArrayAtomicFieldData { + private final BigDoubleArrayList values; private final FixedBitSet set; - public SingleFixedSet(double[] values, int numDocs, FixedBitSet set) { - super(values, numDocs); + public SingleFixedSet(BigDoubleArrayList values, int numDocs, FixedBitSet set) { + super(numDocs); + this.values = values; this.set = set; } @@ -188,7 +191,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + (set.getBits().length * RamUsage.NUM_BYTES_LONG); + size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + (set.getBits().length * RamUsage.NUM_BYTES_LONG); } return size; } @@ -205,10 +208,10 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData static class LongValues extends org.elasticsearch.index.fielddata.LongValues { - private final double[] values; + private final BigDoubleArrayList values; private final FixedBitSet set; - LongValues(double[] values, FixedBitSet set) { + LongValues(BigDoubleArrayList values, FixedBitSet set) { super(false); this.values = values; this.set = set; @@ -221,16 +224,16 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public long getValue(int docId) { - return (long) values[docId]; + return (long) values.get(docId); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues { - private final double[] values; + private final BigDoubleArrayList values; private final FixedBitSet set; - DoubleValues(double[] values, FixedBitSet set) { + DoubleValues(BigDoubleArrayList values, FixedBitSet set) { super(false); this.values = values; this.set = set; @@ -243,7 +246,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public double getValue(int docId) { - return values[docId]; + return values.get(docId); } } @@ -254,12 +257,15 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData */ public static class Single extends DoubleArrayAtomicFieldData { + private final BigDoubleArrayList values; + /** * Note, here, we assume that there is no offset by 1 from docId, so position 0 * is the value for docId 0. */ - public Single(double[] values, int numDocs) { - super(values, numDocs); + public Single(BigDoubleArrayList values, int numDocs) { + super(numDocs); + this.values = values; } @Override @@ -275,7 +281,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE); + size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes(); } return size; } @@ -292,32 +298,32 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense { - private final double[] values; + private final BigDoubleArrayList values; - LongValues(double[] values) { + LongValues(BigDoubleArrayList values) { super(false); this.values = values; } @Override public long getValue(int docId) { - return (long) values[docId]; + return (long) values.get(docId); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense { - private final double[] values; + private final BigDoubleArrayList values; - DoubleValues(double[] values) { + DoubleValues(BigDoubleArrayList values) { super(false); this.values = values; } @Override public double getValue(int docId) { - return values[docId]; + return values.get(docId); } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java index 8747fc2e214..dbd5cf030a9 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.fielddata.plain; -import gnu.trove.list.array.TDoubleArrayList; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Terms; @@ -27,11 +26,11 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigDoubleArrayList; import org.elasticsearch.index.Index; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; @@ -49,7 +48,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) { return new DoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache); } } @@ -92,11 +91,11 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData fst; @@ -104,18 +102,17 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals fstEnum = new BytesRefFSTEnum(fst); - int[] hashes = new int[ordinals.getMaxOrd()]; - InputOutput next; + BigIntArray hashes = new BigIntArray(ordinals.getMaxOrd()); // we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support // empty strings twice. ie. them merge fails for long output. - hashes[0] = new BytesRef().hashCode(); - int i = 1; + hashes.set(0, new BytesRef().hashCode()); try { - while ((next = fstEnum.next()) != null) { - hashes[i++] = next.input.hashCode(); + for (long i = 1, maxOrd = ordinals.getMaxOrd(); i < maxOrd; ++i) { + hashes.set(i, fstEnum.next().input.hashCode()); } - } catch (IOException ex) { - //bogus + assert fstEnum.next() == null; + } catch (IOException e) { + throw new AssertionError("Cannot happen", e); } this.hashes = hashes; } @@ -141,7 +138,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals fst, Docs ordinals, int[] hashes) { + SingleHashed(FST fst, Docs ordinals, BigIntArray hashes) { super(fst, ordinals); this.hashes = hashes; } @@ -188,16 +185,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals fst, Docs ordinals, int[] hashes) { + MultiHashed(FST fst, Docs ordinals, BigIntArray hashes) { super(fst, ordinals); this.hashes = hashes; } @@ -230,16 +227,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals fstBuilder = new org.apache.lucene.util.fst.Builder(INPUT_TYPE.BYTE1, outputs); final IntsRef scratch = new IntsRef(); - boolean preDefineBitsRequired = regex == null && frequency == null; - final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); - OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio); + final long numTerms; + if (regex == null && frequency == null) { + numTerms = terms.size(); + } else { + numTerms = -1; + } + final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); + OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio); try { // we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support @@ -75,7 +79,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData 0; fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd); docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayAtomicFieldData.java index 9e1106e625e..c4048d77558 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayAtomicFieldData.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.RamUsage; +import org.elasticsearch.common.util.BigFloatArrayList; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.ordinals.Ordinals; @@ -30,14 +31,12 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { public static final FloatArrayAtomicFieldData EMPTY = new Empty(); - protected final float[] values; private final int numDocs; protected long size = -1; - public FloatArrayAtomicFieldData(float[] values, int numDocs) { + public FloatArrayAtomicFieldData(int numDocs) { super(true); - this.values = values; this.numDocs = numDocs; } @@ -53,7 +52,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { static class Empty extends FloatArrayAtomicFieldData { Empty() { - super(null, 0); + super(0); } @Override @@ -95,9 +94,11 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { public static class WithOrdinals extends FloatArrayAtomicFieldData { private final Ordinals ordinals; + private final BigFloatArrayList values; - public WithOrdinals(float[] values, int numDocs, Ordinals ordinals) { - super(values, numDocs); + public WithOrdinals(BigFloatArrayList values, int numDocs, Ordinals ordinals) { + super(numDocs); + this.values = values; this.ordinals = ordinals; } @@ -114,7 +115,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT) + ordinals.getMemorySizeInBytes(); + size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes(); } return size; } @@ -131,31 +132,31 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals { - private final float[] values; + private final BigFloatArrayList values; - LongValues(float[] values, Ordinals.Docs ordinals) { + LongValues(BigFloatArrayList values, Ordinals.Docs ordinals) { super(ordinals); this.values = values; } @Override - public long getValueByOrd(int ord) { - return (long) values[ord]; + public long getValueByOrd(long ord) { + return (long) values.get(ord); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals { - private final float[] values; + private final BigFloatArrayList values; - DoubleValues(float[] values, Ordinals.Docs ordinals) { + DoubleValues(BigFloatArrayList values, Ordinals.Docs ordinals) { super(ordinals); this.values = values; } @Override - public double getValueByOrd(int ord) { - return values[ord]; + public double getValueByOrd(long ord) { + return values.get(ord); } } } @@ -166,10 +167,12 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { */ public static class SingleFixedSet extends FloatArrayAtomicFieldData { + private final BigFloatArrayList values; private final FixedBitSet set; - public SingleFixedSet(float[] values, int numDocs, FixedBitSet set) { - super(values, numDocs); + public SingleFixedSet(BigFloatArrayList values, int numDocs, FixedBitSet set) { + super(numDocs); + this.values = values; this.set = set; } @@ -186,7 +189,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT) + (set.getBits().length * RamUsage.NUM_BYTES_LONG); + size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + (set.getBits().length * RamUsage.NUM_BYTES_LONG); } return size; } @@ -204,10 +207,10 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { static class LongValues extends org.elasticsearch.index.fielddata.LongValues { - private final float[] values; + private final BigFloatArrayList values; private final FixedBitSet set; - LongValues(float[] values, FixedBitSet set) { + LongValues(BigFloatArrayList values, FixedBitSet set) { super(false); this.values = values; this.set = set; @@ -220,16 +223,16 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public long getValue(int docId) { - return (long) values[docId]; + return (long) values.get(docId); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues { - private final float[] values; + private final BigFloatArrayList values; private final FixedBitSet set; - DoubleValues(float[] values, FixedBitSet set) { + DoubleValues(BigFloatArrayList values, FixedBitSet set) { super(false); this.values = values; this.set = set; @@ -242,7 +245,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public double getValue(int docId) { - return (double) values[docId]; + return (double) values.get(docId); } } @@ -254,12 +257,15 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { */ public static class Single extends FloatArrayAtomicFieldData { + private final BigFloatArrayList values; + /** * Note, here, we assume that there is no offset by 1 from docId, so position 0 * is the value for docId 0. */ - public Single(float[] values, int numDocs) { - super(values, numDocs); + public Single(BigFloatArrayList values, int numDocs) { + super(numDocs); + this.values = values; } @Override @@ -275,7 +281,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT); + size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes(); } return size; } @@ -293,32 +299,32 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense { - private final float[] values; + private final BigFloatArrayList values; - LongValues(float[] values) { + LongValues(BigFloatArrayList values) { super(false); this.values = values; } @Override public long getValue(int docId) { - return (long) values[docId]; + return (long) values.get(docId); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense { - private final float[] values; + private final BigFloatArrayList values; - DoubleValues(float[] values) { + DoubleValues(BigFloatArrayList values) { super(false); this.values = values; } @Override public double getValue(int docId) { - return (double) values[docId]; + return (double) values.get(docId); } } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayIndexFieldData.java index 7c71d0897fb..80c1d1ba51e 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayIndexFieldData.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.fielddata.plain; -import gnu.trove.list.array.TFloatArrayList; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Terms; @@ -27,11 +26,11 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigFloatArrayList; import org.elasticsearch.index.Index; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource; @@ -49,7 +48,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) { return new FloatArrayIndexFieldData(index, indexSettings, fieldNames, type, cache); } } @@ -91,12 +90,12 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) { return new GeoPointDoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache); } } @@ -83,12 +82,12 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData 0) { sValues.set(i, values.get(ord - 1) - minValue); } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java index 00dc50eda5b..16b7c37b61e 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java @@ -23,6 +23,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.PagedBytes.Reader; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; +import org.elasticsearch.common.util.BigIntArray; import org.elasticsearch.index.fielddata.AtomicFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals; @@ -42,7 +43,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals termCountHardLimit) { - // app is misusing the API (there is more than - // one term per doc); in this case we make best - // effort to load what we can (see LUCENE-2142) - numUniqueTerms = termCountHardLimit; - } - } - final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer(); termOrdToBytesOffset.add(0); // first ord is reserved for missing values - boolean preDefineBitsRequired = regex == null && frequency == null; - final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); - OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio); + final long numTerms; + if (regex == null && frequency == null) { + numTerms = terms.size(); + } else { + numTerms = -1; + } + final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); + OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio); try { // 0 is reserved for "unset" bytes.copyUsingLengthPrefix(new BytesRef()); TermsEnum termsEnum = filter(terms, reader); DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { - final int termOrd = builder.nextOrdinal(); + final long termOrd = builder.nextOrdinal(); assert termOrd == termOrdToBytesOffset.size(); termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term)); docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetExecutor.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetExecutor.java index a157e114f8c..72d5e09e1a3 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetExecutor.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetExecutor.java @@ -27,6 +27,8 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.cache.recycler.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.common.util.IntArrays; import org.elasticsearch.index.fielddata.BytesValues; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.ordinals.Ordinals; @@ -114,7 +116,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we get around this?) int count = 0; do { - count += agg.counts[agg.position]; + count += agg.counts.get(agg.position); if (agg.nextPosition()) { agg = queue.updateTop(); } else { @@ -144,12 +146,6 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop(); } - for (ReaderAggregator aggregator : aggregators) { - if (aggregator.counts.length > ordinalsCacheAbove) { - cacheRecycler.pushIntArray(aggregator.counts); - } - } - return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total); } @@ -160,7 +156,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we work around that?) int count = 0; do { - count += agg.counts[agg.position]; + count += agg.counts.get(agg.position); if (agg.nextPosition()) { agg = queue.updateTop(); } else { @@ -186,13 +182,6 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { } } - - for (ReaderAggregator aggregator : aggregators) { - if (aggregator.counts.length > ordinalsCacheAbove) { - cacheRecycler.pushIntArray(aggregator.counts); - } - } - return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total); } @@ -207,8 +196,8 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { @Override public void setNextReader(AtomicReaderContext context) throws IOException { if (current != null) { - missing += current.counts[0]; - total += current.total - current.counts[0]; + missing += current.counts.get(0); + total += current.total - current.counts.get(0); if (current.values.ordinals().getNumOrds() > 0) { aggregators.add(current); } @@ -221,7 +210,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { @Override public void collect(int doc) throws IOException { Iter iter = ordinals.getIter(doc); - int ord = iter.next(); + long ord = iter.next(); current.onOrdinal(doc, ord); while ((ord = iter.next()) != 0) { current.onOrdinal(doc, ord); @@ -231,8 +220,8 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { @Override public void postCollection() { if (current != null) { - missing += current.counts[0]; - total += current.total - current.counts[0]; + missing += current.counts.get(0); + total += current.total - current.counts.get(0); // if we have values for this one, add it if (current.values.ordinals().getNumOrds() > 0) { aggregators.add(current); @@ -247,26 +236,21 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { public static final class ReaderAggregator { final BytesValues.WithOrdinals values; - final int[] counts; + final IntArray counts; - int position = 0; + long position = 0; BytesRef current; int total; - private final int maxOrd; + private final long maxOrd; public ReaderAggregator(BytesValues.WithOrdinals values, int ordinalsCacheLimit, CacheRecycler cacheRecycler) { this.values = values; this.maxOrd = values.ordinals().getMaxOrd(); - - if (maxOrd > ordinalsCacheLimit) { - this.counts = cacheRecycler.popIntArray(maxOrd); - } else { - this.counts = new int[maxOrd]; - } + this.counts = IntArrays.allocate(maxOrd); } - final void onOrdinal(int docId, int ordinal) { - counts[ordinal]++; + final void onOrdinal(int docId, long ordinal) { + counts.increment(ordinal, 1); total++; } diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/FilterFieldDataTest.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/FilterFieldDataTest.java index 644eec3cb01..72f65634b6c 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/FilterFieldDataTest.java +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/FilterFieldDataTest.java @@ -82,7 +82,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(2, equalTo(ordinals.getNumOrds())); + assertThat(2L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10")); assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100")); @@ -95,7 +95,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(1, equalTo(ordinals.getNumOrds())); + assertThat(1L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5")); } @@ -108,7 +108,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(2, equalTo(ordinals.getNumOrds())); + assertThat(2L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10")); assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100")); @@ -122,7 +122,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(2, equalTo(ordinals.getNumOrds())); + assertThat(2L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10")); assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100")); @@ -139,7 +139,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(1, equalTo(ordinals.getNumOrds())); + assertThat(1L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("100")); } @@ -184,7 +184,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(1, equalTo(ordinals.getNumOrds())); + assertThat(1L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5")); } @@ -196,7 +196,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(2, equalTo(ordinals.getNumOrds())); + assertThat(2L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10")); assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("5")); diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/FlatMultiOrdinalsTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/FlatMultiOrdinalsTests.java deleted file mode 100644 index 840e4b8c6d1..00000000000 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/FlatMultiOrdinalsTests.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.test.unit.index.fielddata.ordinals; - -import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.index.fielddata.ordinals.Ordinals; -import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; - -/** - */ -public class FlatMultiOrdinalsTests extends MultiOrdinalsTests { - - @Override - protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) { - settings.put("multi_ordinals", "flat"); - return builder.build(settings.build()); - } -} diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/MultiOrdinalsTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/MultiOrdinalsTests.java index 880386d8822..576b61ba34c 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/MultiOrdinalsTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/MultiOrdinalsTests.java @@ -19,8 +19,9 @@ package org.elasticsearch.test.unit.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.index.fielddata.ordinals.MultiOrdinals; import org.elasticsearch.index.fielddata.ordinals.Ordinals; import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; import org.testng.annotations.Test; @@ -30,18 +31,20 @@ import java.util.*; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; /** */ -public abstract class MultiOrdinalsTests { +public class MultiOrdinalsTests { protected final Ordinals creationMultiOrdinals(OrdinalsBuilder builder) { return this.creationMultiOrdinals(builder, ImmutableSettings.builder()); } - protected abstract Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings); + protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) { + return builder.build(settings.build()); + } + @Test public void testRandomValues() throws IOException { @@ -74,7 +77,7 @@ public abstract class MultiOrdinalsTests { return 1; } }); - int lastOrd = -1; + long lastOrd = -1; for (OrdAndId ordAndId : ordsAndIds) { if (lastOrd != ordAndId.ord) { lastOrd = ordAndId.ord; @@ -105,27 +108,27 @@ public abstract class MultiOrdinalsTests { Ordinals ords = creationMultiOrdinals(builder); Ordinals.Docs docs = ords.ordinals(); int docId = ordsAndIds.get(0).id; - List docOrds = new ArrayList(); + List docOrds = new ArrayList(); for (OrdAndId ordAndId : ordsAndIds) { if (docId == ordAndId.id) { docOrds.add(ordAndId.ord); } else { if (!docOrds.isEmpty()) { assertThat(docs.getOrd(docId), equalTo(docOrds.get(0))); - IntsRef ref = docs.getOrds(docId); + LongsRef ref = docs.getOrds(docId); assertThat(ref.offset, equalTo(0)); for (int i = ref.offset; i < ref.length; i++) { - assertThat(ref.ints[i], equalTo(docOrds.get(i))); + assertThat(ref.longs[i], equalTo(docOrds.get(i))); } - final int[] array = new int[docOrds.size()]; + final long[] array = new long[docOrds.size()]; for (int i = 0; i < array.length; i++) { array[i] = docOrds.get(i); } assertIter(docs.getIter(docId), array); } for (int i = docId + 1; i < ordAndId.id; i++) { - assertThat(docs.getOrd(i), equalTo(0)); + assertThat(docs.getOrd(i), equalTo(0L)); } docId = ordAndId.id; docOrds.clear(); @@ -137,10 +140,10 @@ public abstract class MultiOrdinalsTests { } public static class OrdAndId { - final int ord; + final long ord; final int id; - public OrdAndId(int ord, int id) { + public OrdAndId(long ord, int id) { this.ord = ord; this.id = id; } @@ -150,7 +153,7 @@ public abstract class MultiOrdinalsTests { final int prime = 31; int result = 1; result = prime * result + id; - result = prime * result + ord; + result = prime * result + (int) ord; return result; } @@ -174,7 +177,7 @@ public abstract class MultiOrdinalsTests { @Test public void testOrdinals() throws Exception { int maxDoc = 7; - int maxOrds = 32; + long maxOrds = 32; OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc); builder.nextOrdinal(); // 1 builder.addDoc(1).addDoc(4).addDoc(5).addDoc(6); @@ -186,97 +189,99 @@ public abstract class MultiOrdinalsTests { builder.addDoc(0).addDoc(4).addDoc(5).addDoc(6); builder.nextOrdinal(); // 5 builder.addDoc(4).addDoc(5).addDoc(6); - int ord = builder.nextOrdinal(); // 6 + long ord = builder.nextOrdinal(); // 6 builder.addDoc(4).addDoc(5).addDoc(6); - for (int i = ord; i < maxOrds; i++) { + for (long i = ord; i < maxOrds; i++) { builder.nextOrdinal(); builder.addDoc(5).addDoc(6); } - + + long[][] ordinalPlan = new long[][] { + {2, 4}, + {1}, + {3}, + {}, + {1, 3, 4, 5, 6}, + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}, + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32} + }; Ordinals ordinals = creationMultiOrdinals(builder); Ordinals.Docs docs = ordinals.ordinals(); - assertThat(docs.getNumDocs(), equalTo(maxDoc)); - assertThat(docs.getNumOrds(), equalTo(maxOrds)); - assertThat(docs.getMaxOrd(), equalTo(maxOrds + 1)); // Includes null ord - assertThat(docs.isMultiValued(), equalTo(true)); - assertThat(ordinals.getMemorySizeInBytes(), greaterThan(0l)); - - // Document 1 - assertThat(docs.getOrd(0), equalTo(2)); - IntsRef ref = docs.getOrds(0); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(2)); - assertThat(ref.ints[1], equalTo(4)); - assertThat(ref.length, equalTo(2)); - assertIter(docs.getIter(0), 2, 4); - - // Document 2 - assertThat(docs.getOrd(1), equalTo(1)); - ref = docs.getOrds(1); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(1)); - assertThat(ref.length, equalTo(1)); - assertIter(docs.getIter(1), 1); - - // Document 3 - assertThat(docs.getOrd(2), equalTo(3)); - ref = docs.getOrds(2); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(3)); - assertThat(ref.length, equalTo(1)); - assertIter(docs.getIter(2), 3); - - // Document 4 - assertThat(docs.getOrd(3), equalTo(0)); - ref = docs.getOrds(3); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.length, equalTo(0)); - assertIter(docs.getIter(3)); - - // Document 5 - assertThat(docs.getOrd(4), equalTo(1)); - ref = docs.getOrds(4); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(1)); - assertThat(ref.ints[1], equalTo(3)); - assertThat(ref.ints[2], equalTo(4)); - assertThat(ref.ints[3], equalTo(5)); - assertThat(ref.ints[4], equalTo(6)); - assertThat(ref.length, equalTo(5)); - assertIter(docs.getIter(4), 1, 3, 4, 5, 6); - - // Document 6 - assertThat(docs.getOrd(5), equalTo(1)); - ref = docs.getOrds(5); - assertThat(ref.offset, equalTo(0)); - int[] expectedOrds = new int[maxOrds]; - for (int i = 0; i < maxOrds; i++) { - expectedOrds[i] = i + 1; - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertIter(docs.getIter(5), expectedOrds); - assertThat(ref.length, equalTo(maxOrds)); - - // Document 7 - assertThat(docs.getOrd(6), equalTo(1)); - ref = docs.getOrds(6); - assertThat(ref.offset, equalTo(0)); - expectedOrds = new int[maxOrds]; - for (int i = 0; i < maxOrds; i++) { - expectedOrds[i] = i + 1; - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertIter(docs.getIter(6), expectedOrds); - assertThat(ref.length, equalTo(maxOrds)); + assertEquals(docs, ordinalPlan); } - protected static void assertIter(Ordinals.Docs.Iter iter, int... expectedOrdinals) { - for (int expectedOrdinal : expectedOrdinals) { + protected static void assertIter(Ordinals.Docs.Iter iter, long... expectedOrdinals) { + for (long expectedOrdinal : expectedOrdinals) { assertThat(iter.next(), equalTo(expectedOrdinal)); } - assertThat(iter.next(), equalTo(0)); // Last one should always be 0 - assertThat(iter.next(), equalTo(0)); // Just checking it stays 0 + assertThat(iter.next(), equalTo(0L)); // Last one should always be 0 + assertThat(iter.next(), equalTo(0L)); // Just checking it stays 0 + } + + @Test + public void testMultiValuesDocsWithOverlappingStorageArrays() throws Exception { + int maxDoc = 7; + long maxOrds = 15; + OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc); + for (int i = 0; i < maxOrds; i++) { + builder.nextOrdinal(); + if (i < 10) { + builder.addDoc(0); + } + builder.addDoc(1); + if (i == 0) { + builder.addDoc(2); + } + if (i < 5) { + builder.addDoc(3); + + } + if (i < 6) { + builder.addDoc(4); + + } + if (i == 1) { + builder.addDoc(5); + } + if (i < 10) { + builder.addDoc(6); + } + } + + long[][] ordinalPlan = new long[][] { + {1,2,3,4,5,6,7,8,9,10}, + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}, + {1}, + {1,2,3,4,5}, + {1,2,3,4,5,6}, + {2}, + {1,2,3,4,5,6,7,8,9,10} + }; + + Ordinals ordinals = new MultiOrdinals(builder); + Ordinals.Docs docs = ordinals.ordinals(); + assertEquals(docs, ordinalPlan); + } + + private void assertEquals(Ordinals.Docs docs, long[][] ordinalPlan) { + long numOrds = 0; + for (int doc = 0; doc < ordinalPlan.length; ++doc) { + if (ordinalPlan[doc].length > 0) { + numOrds = Math.max(numOrds, ordinalPlan[doc][ordinalPlan[doc].length - 1]); + } + } + assertThat(docs.getNumDocs(), equalTo(ordinalPlan.length)); + assertThat(docs.getNumOrds(), equalTo(numOrds)); // Includes null ord + assertThat(docs.getMaxOrd(), equalTo(numOrds + 1)); + assertThat(docs.isMultiValued(), equalTo(true)); + for (int doc = 0; doc < ordinalPlan.length; ++doc) { + LongsRef ref = docs.getOrds(doc); + assertThat(ref.offset, equalTo(0)); + long[] ords = ordinalPlan[doc]; + assertThat(ref, equalTo(new LongsRef(ords, 0, ords.length))); + assertIter(docs.getIter(doc), ords); + } } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SingleOrdinalsTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SingleOrdinalsTests.java index 07fb09a38d3..69262ea0ea1 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SingleOrdinalsTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SingleOrdinalsTests.java @@ -40,9 +40,9 @@ public class SingleOrdinalsTests { public void testSvValues() throws IOException { int numDocs = 1000000; int numOrdinals = numDocs / 4; - Map controlDocToOrdinal = new HashMap(); + Map controlDocToOrdinal = new HashMap(); OrdinalsBuilder builder = new OrdinalsBuilder(numDocs); - int ordinal = builder.nextOrdinal(); + long ordinal = builder.nextOrdinal(); for (int doc = 0; doc < numDocs; doc++) { if (doc % numOrdinals == 0) { ordinal = builder.nextOrdinal(); @@ -56,7 +56,7 @@ public class SingleOrdinalsTests { Ordinals.Docs docs = ords.ordinals(); assertThat(controlDocToOrdinal.size(), equalTo(docs.getNumDocs())); - for (Map.Entry entry : controlDocToOrdinal.entrySet()) { + for (Map.Entry entry : controlDocToOrdinal.entrySet()) { assertThat(entry.getValue(), equalTo(docs.getOrd(entry.getKey()))); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SparseMultiOrdinalsTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SparseMultiOrdinalsTests.java deleted file mode 100644 index a56c38b37c7..00000000000 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SparseMultiOrdinalsTests.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.test.unit.index.fielddata.ordinals; - -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.ElasticSearchException; -import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.common.settings.ImmutableSettings.Builder; -import org.elasticsearch.index.fielddata.ordinals.Ordinals; -import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; -import org.elasticsearch.index.fielddata.ordinals.SparseMultiArrayOrdinals; -import org.testng.annotations.Test; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.testng.Assert.fail; - -/** - */ -public class SparseMultiOrdinalsTests extends MultiOrdinalsTests { - - @Override - protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) { - settings.put("multi_ordinals", "sparse"); - return builder.build(settings.build()); - } - - @Test - public void testMultiValuesSurpassOrdinalsLimit() throws Exception { - OrdinalsBuilder builder = new OrdinalsBuilder(2); - int maxOrds = 128; - for (int i = 0; i < maxOrds; i++) { - builder.nextOrdinal(); - if (i == 2 || i == 4) { - builder.addDoc(0); - } - builder.addDoc(1); - - } - - try { - Builder builder2 = ImmutableSettings.builder(); - builder2.put("multi_ordinals_max_docs", 64); - creationMultiOrdinals(builder, builder2); - fail("Exception should have been throwed"); - } catch (ElasticSearchException e) { - - } - } - - @Test - public void testMultiValuesDocsWithOverlappingStorageArrays() throws Exception { - int maxDoc = 7; - int maxOrds = 15; - OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc); - for (int i = 0; i < maxOrds; i++) { - builder.nextOrdinal(); - if (i < 10) { - builder.addDoc(0); - } - builder.addDoc(1); - if (i == 0) { - builder.addDoc(2); - } - if (i < 5) { - builder.addDoc(3); - - } - if (i < 6) { - builder.addDoc(4); - - } - if (i == 1) { - builder.addDoc(5); - } - if (i < 10) { - builder.addDoc(6); - } - } - - Ordinals ordinals = new SparseMultiArrayOrdinals(builder, 64); - Ordinals.Docs docs = ordinals.ordinals(); - assertThat(docs.getNumDocs(), equalTo(maxDoc)); - assertThat(docs.getNumOrds(), equalTo(maxOrds)); // Includes null ord - assertThat(docs.isMultiValued(), equalTo(true)); - - // Document 1 - assertThat(docs.getOrd(0), equalTo(1)); - IntsRef ref = docs.getOrds(0); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 10; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(10)); - - // Document 2 - assertThat(docs.getOrd(1), equalTo(1)); - ref = docs.getOrds(1); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 15; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(15)); - - // Document 3 - assertThat(docs.getOrd(2), equalTo(1)); - ref = docs.getOrds(2); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(1)); - assertThat(ref.length, equalTo(1)); - - // Document 4 - assertThat(docs.getOrd(3), equalTo(1)); - ref = docs.getOrds(3); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 5; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(5)); - - // Document 5 - assertThat(docs.getOrd(4), equalTo(1)); - ref = docs.getOrds(4); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 6; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(6)); - - // Document 6 - assertThat(docs.getOrd(5), equalTo(2)); - ref = docs.getOrds(5); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(2)); - assertThat(ref.length, equalTo(1)); - - // Document 7 - assertThat(docs.getOrd(6), equalTo(1)); - ref = docs.getOrds(6); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 10; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(10)); - } - -}