diff --git a/src/main/java/org/apache/lucene/util/packed/XAbstractPagedMutable.java b/src/main/java/org/apache/lucene/util/packed/XAbstractPagedMutable.java new file mode 100644 index 00000000000..f1b3679a365 --- /dev/null +++ b/src/main/java/org/apache/lucene/util/packed/XAbstractPagedMutable.java @@ -0,0 +1,171 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static org.apache.lucene.util.packed.XPackedInts.checkBlockSize; +import static org.apache.lucene.util.packed.XPackedInts.numBlocks; + +import org.apache.lucene.util.Version; +import org.elasticsearch.common.lucene.Lucene; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * Base implementation for {@link XPagedMutable} and {@link PagedGrowableWriter}. + * @lucene.internal + */ +abstract class XAbstractPagedMutable> { + + static { + // LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640. + assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed"; + } + + static final int MIN_BLOCK_SIZE = 1 << 6; + static final int MAX_BLOCK_SIZE = 1 << 30; + + final long size; + final int pageShift; + final int pageMask; + final PackedInts.Mutable[] subMutables; + final int bitsPerValue; + + XAbstractPagedMutable(int bitsPerValue, long size, int pageSize) { + this.bitsPerValue = bitsPerValue; + this.size = size; + pageShift = checkBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE); + pageMask = pageSize - 1; + final int numPages = numBlocks(size, pageSize); + subMutables = new PackedInts.Mutable[numPages]; + } + + protected final void fillPages() { + final int numPages = numBlocks(size, pageSize()); + for (int i = 0; i < numPages; ++i) { + // do not allocate for more entries than necessary on the last page + final int valueCount = i == numPages - 1 ? lastPageSize(size) : pageSize(); + subMutables[i] = newMutable(valueCount, bitsPerValue); + } + } + + protected abstract PackedInts.Mutable newMutable(int valueCount, int bitsPerValue); + + final int lastPageSize(long size) { + final int sz = indexInPage(size); + return sz == 0 ? pageSize() : sz; + } + + final int pageSize() { + return pageMask + 1; + } + + /** The number of values. */ + public final long size() { + return size; + } + + final int pageIndex(long index) { + return (int) (index >>> pageShift); + } + + final int indexInPage(long index) { + return (int) index & pageMask; + } + + /** Get value at index. */ + public final long get(long index) { + assert index >= 0 && index < size; + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return subMutables[pageIndex].get(indexInPage); + } + + /** Set value at index. */ + public final void set(long index, long value) { + assert index >= 0 && index < size; + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + subMutables[pageIndex].set(indexInPage, value); + } + + protected long baseRamBytesUsed() { + return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + + RamUsageEstimator.NUM_BYTES_OBJECT_REF + + RamUsageEstimator.NUM_BYTES_LONG + + 3 * RamUsageEstimator.NUM_BYTES_INT; + } + + /** Return the number of bytes used by this object. */ + public long ramBytesUsed() { + long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed()); + bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length); + for (PackedInts.Mutable gw : subMutables) { + bytesUsed += gw.ramBytesUsed(); + } + return bytesUsed; + } + + protected abstract T newUnfilledCopy(long newSize); + + /** Create a new copy of size newSize based on the content of + * this buffer. This method is much more efficient than creating a new + * instance and copying values one by one. */ + public final T resize(long newSize) { + final T copy = newUnfilledCopy(newSize); + final int numCommonPages = Math.min(copy.subMutables.length, subMutables.length); + final long[] copyBuffer = new long[1024]; + for (int i = 0; i < copy.subMutables.length; ++i) { + final int valueCount = i == copy.subMutables.length - 1 ? lastPageSize(newSize) : pageSize(); + final int bpv = i < numCommonPages ? subMutables[i].getBitsPerValue() : this.bitsPerValue; + copy.subMutables[i] = newMutable(valueCount, bpv); + if (i < numCommonPages) { + final int copyLength = Math.min(valueCount, subMutables[i].size()); + XPackedInts.copy(subMutables[i], 0, copy.subMutables[i], 0, copyLength, copyBuffer); + } + } + return copy; + } + + /** Similar to {@link ArrayUtil#grow(long[], int)}. */ + public final T grow(long minSize) { + assert minSize >= 0; + if (minSize <= size()) { + @SuppressWarnings("unchecked") + final T result = (T) this; + return result; + } + long extra = minSize >>> 3; + if (extra < 3) { + extra = 3; + } + final long newSize = minSize + extra; + return resize(newSize); + } + + /** Similar to {@link ArrayUtil#grow(long[])}. */ + public final T grow() { + return grow(size() + 1); + } + + @Override + public final String toString() { + return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")"; + } + +} diff --git a/src/main/java/org/apache/lucene/util/packed/XGrowableWriter.java b/src/main/java/org/apache/lucene/util/packed/XGrowableWriter.java new file mode 100644 index 00000000000..c36eea376f5 --- /dev/null +++ b/src/main/java/org/apache/lucene/util/packed/XGrowableWriter.java @@ -0,0 +1,162 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Version; +import org.elasticsearch.common.lucene.Lucene; + +import java.io.IOException; + +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.RamUsageEstimator; + +/** + * Implements {@link XPackedInts.Mutable}, but grows the + * bit count of the underlying packed ints on-demand. + *

Beware that this class will accept to set negative values but in order + * to do this, it will grow the number of bits per value to 64. + * + *

@lucene.internal

+ */ +public class XGrowableWriter implements PackedInts.Mutable { + + static { + // LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640. + assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed"; + } + + private long currentMask; + private PackedInts.Mutable current; + private final float acceptableOverheadRatio; + + /** + * @param startBitsPerValue the initial number of bits per value, may grow depending on the data + * @param valueCount the number of values + * @param acceptableOverheadRatio an acceptable overhead ratio + */ + public XGrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) { + this.acceptableOverheadRatio = acceptableOverheadRatio; + current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio); + currentMask = mask(current.getBitsPerValue()); + } + + private static long mask(int bitsPerValue) { + return bitsPerValue == 64 ? ~0L : PackedInts.maxValue(bitsPerValue); + } + + @Override + public long get(int index) { + return current.get(index); + } + + @Override + public int size() { + return current.size(); + } + + @Override + public int getBitsPerValue() { + return current.getBitsPerValue(); + } + + public PackedInts.Mutable getMutable() { + return current; + } + + @Override + public Object getArray() { + return current.getArray(); + } + + @Override + public boolean hasArray() { + return current.hasArray(); + } + + private void ensureCapacity(long value) { + if ((value & currentMask) == value) { + return; + } + final int bitsRequired = value < 0 ? 64 : PackedInts.bitsRequired(value); + assert bitsRequired > current.getBitsPerValue(); + final int valueCount = size(); + PackedInts.Mutable next = PackedInts.getMutable(valueCount, bitsRequired, acceptableOverheadRatio); + PackedInts.copy(current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE); + current = next; + currentMask = mask(current.getBitsPerValue()); + } + + @Override + public void set(int index, long value) { + ensureCapacity(value); + current.set(index, value); + } + + @Override + public void clear() { + current.clear(); + } + + public XGrowableWriter resize(int newSize) { + XGrowableWriter next = new XGrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio); + final int limit = Math.min(size(), newSize); + PackedInts.copy(current, 0, next, 0, limit, PackedInts.DEFAULT_BUFFER_SIZE); + return next; + } + + @Override + public int get(int index, long[] arr, int off, int len) { + return current.get(index, arr, off, len); + } + + @Override + public int set(int index, long[] arr, int off, int len) { + long max = 0; + for (int i = off, end = off + len; i < end; ++i) { + // bitwise or is nice because either all values are positive and the + // or-ed result will require as many bits per value as the max of the + // values, or one of them is negative and the result will be negative, + // forcing GrowableWriter to use 64 bits per value + max |= arr[i]; + } + ensureCapacity(max); + return current.set(index, arr, off, len); + } + + @Override + public void fill(int fromIndex, int toIndex, long val) { + ensureCapacity(val); + current.fill(fromIndex, toIndex, val); + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.alignObjectSize( + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + + RamUsageEstimator.NUM_BYTES_OBJECT_REF + + RamUsageEstimator.NUM_BYTES_LONG + + RamUsageEstimator.NUM_BYTES_FLOAT) + + current.ramBytesUsed(); + } + + @Override + public void save(DataOutput out) throws IOException { + current.save(out); + } + +} diff --git a/src/main/java/org/apache/lucene/util/packed/XPackedInts.java b/src/main/java/org/apache/lucene/util/packed/XPackedInts.java new file mode 100644 index 00000000000..9a6c733c630 --- /dev/null +++ b/src/main/java/org/apache/lucene/util/packed/XPackedInts.java @@ -0,0 +1,88 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Version; +import org.apache.lucene.util.packed.PackedInts.Mutable; +import org.apache.lucene.util.packed.PackedInts.Reader; +import org.elasticsearch.common.lucene.Lucene; + +/** + * Simplistic compression for array of unsigned long values. + * Each value is >= 0 and <= a specified maximum value. The + * values are stored as packed ints, with each value + * consuming a fixed number of bits. + * + * @lucene.internal + */ +public class XPackedInts { + + static { + // LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640. + assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed"; + } + + /** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */ + static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) { + assert buf.length > 0; + int remaining = 0; + while (len > 0) { + final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining)); + assert read > 0; + srcPos += read; + len -= read; + remaining += read; + final int written = dest.set(destPos, buf, 0, remaining); + assert written > 0; + destPos += written; + if (written < remaining) { + System.arraycopy(buf, written, buf, 0, remaining - written); + } + remaining -= written; + } + while (remaining > 0) { + final int written = dest.set(destPos, buf, 0, remaining); + destPos += written; + remaining -= written; + System.arraycopy(buf, written, buf, 0, remaining); + } + } + + /** Check that the block size is a power of 2, in the right bounds, and return + * its log in base 2. */ + static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) { + if (blockSize < minBlockSize || blockSize > maxBlockSize) { + throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize); + } + if ((blockSize & (blockSize - 1)) != 0) { + throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize); + } + return Integer.numberOfTrailingZeros(blockSize); + } + + /** Return the number of blocks required to store size values on + * blockSize. */ + static int numBlocks(long size, int blockSize) { + final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1); + if ((long) numBlocks * blockSize < size) { + throw new IllegalArgumentException("size is too large for this block size"); + } + return numBlocks; + } + +} diff --git a/src/main/java/org/apache/lucene/util/packed/XPagedGrowableWriter.java b/src/main/java/org/apache/lucene/util/packed/XPagedGrowableWriter.java new file mode 100644 index 00000000000..e339de17565 --- /dev/null +++ b/src/main/java/org/apache/lucene/util/packed/XPagedGrowableWriter.java @@ -0,0 +1,79 @@ +package org.apache.lucene.util.packed; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Version; +import org.elasticsearch.common.lucene.Lucene; + +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.PackedInts.Mutable; + +/** + * A {@link XPagedGrowableWriter}. This class slices data into fixed-size blocks + * which have independent numbers of bits per value and grow on-demand. + *

You should use this class instead of {@link AppendingLongBuffer} only when + * you need random write-access. Otherwise this class will likely be slower and + * less memory-efficient. + * @lucene.internal + */ +public final class XPagedGrowableWriter extends XAbstractPagedMutable { + + static { + // LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640. + assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed"; + } + + final float acceptableOverheadRatio; + + /** + * Create a new {@link XPagedGrowableWriter} instance. + * + * @param size the number of values to store. + * @param pageSize the number of values per page + * @param startBitsPerValue the initial number of bits per value + * @param acceptableOverheadRatio an acceptable overhead ratio + */ + public XPagedGrowableWriter(long size, int pageSize, + int startBitsPerValue, float acceptableOverheadRatio) { + this(size, pageSize, startBitsPerValue, acceptableOverheadRatio, true); + } + + XPagedGrowableWriter(long size, int pageSize,int startBitsPerValue, float acceptableOverheadRatio, boolean fillPages) { + super(startBitsPerValue, size, pageSize); + this.acceptableOverheadRatio = acceptableOverheadRatio; + if (fillPages) { + fillPages(); + } + } + + @Override + protected Mutable newMutable(int valueCount, int bitsPerValue) { + return new XGrowableWriter(bitsPerValue, valueCount, acceptableOverheadRatio); + } + + @Override + protected XPagedGrowableWriter newUnfilledCopy(long newSize) { + return new XPagedGrowableWriter(newSize, pageSize(), bitsPerValue, acceptableOverheadRatio, false); + } + + @Override + protected long baseRamBytesUsed() { + return super.baseRamBytesUsed() + RamUsageEstimator.NUM_BYTES_FLOAT; + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/AbstractBigArray.java b/src/main/java/org/elasticsearch/common/util/AbstractBigArray.java new file mode 100644 index 00000000000..3cb5267e983 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/AbstractBigArray.java @@ -0,0 +1,68 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import com.google.common.base.Preconditions; + +/** Common implementation for array lists that slice data into fixed-size blocks. */ +abstract class AbstractBigArray { + + private final int pageShift; + private final int pageMask; + protected long size; + + protected AbstractBigArray(int pageSize) { + Preconditions.checkArgument(pageSize >= 128, "pageSize must be >= 128"); + Preconditions.checkArgument((pageSize & (pageSize - 1)) == 0, "pageSize must be a power of two"); + this.pageShift = Integer.numberOfTrailingZeros(pageSize); + this.pageMask = pageSize - 1; + size = 0; + } + + final int numPages(long capacity) { + final long numPages = (capacity + pageMask) >>> pageShift; + Preconditions.checkArgument(numPages <= Integer.MAX_VALUE, "pageSize=" + (pageMask + 1) + " is too small for such as capacity: " + capacity); + return (int) numPages; + } + + final int pageSize() { + return pageMask + 1; + } + + final int pageIndex(long index) { + return (int) (index >>> pageShift); + } + + final int indexInPage(long index) { + return (int) (index & pageMask); + } + + public final long size() { + return size; + } + + protected abstract int numBytesPerElement(); + + public final long sizeInBytes() { + // rough approximate, we only take into account the size of the values, not the overhead of the array objects + return ((long) pageIndex(size - 1) + 1) * pageSize() * numBytesPerElement(); + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/BigDoubleArrayList.java b/src/main/java/org/elasticsearch/common/util/BigDoubleArrayList.java new file mode 100644 index 00000000000..6f213588597 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/BigDoubleArrayList.java @@ -0,0 +1,75 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import org.apache.lucene.util.ArrayUtil; +import org.elasticsearch.common.RamUsage; + +import java.util.Arrays; + +/** Float array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of + * configurable length. */ +public final class BigDoubleArrayList extends AbstractBigArray { + + /** Default page size, 16KB of memory per page. */ + private static final int DEFAULT_PAGE_SIZE = 1 << 11; + + private double[][] pages; + + public BigDoubleArrayList(int pageSize, long initialCapacity) { + super(pageSize); + pages = new double[numPages(initialCapacity)][]; + } + + public BigDoubleArrayList(long initialCapacity) { + this(DEFAULT_PAGE_SIZE, initialCapacity); + } + + public BigDoubleArrayList() { + this(1024); + } + + public double get(long index) { + assert index >= 0 && index < size; + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return pages[pageIndex][indexInPage]; + } + + public void add(double d) { + final int pageIndex = pageIndex(size); + if (pageIndex >= pages.length) { + final int newLength = ArrayUtil.oversize(pageIndex + 1, numBytesPerElement()); + pages = Arrays.copyOf(pages, newLength); + } + if (pages[pageIndex] == null) { + pages[pageIndex] = new double[pageSize()]; + } + final int indexInPage = indexInPage(size); + pages[pageIndex][indexInPage] = d; + ++size; + } + + @Override + protected int numBytesPerElement() { + return RamUsage.NUM_BYTES_DOUBLE; + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/BigFloatArrayList.java b/src/main/java/org/elasticsearch/common/util/BigFloatArrayList.java new file mode 100644 index 00000000000..15ac6dd0a9c --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/BigFloatArrayList.java @@ -0,0 +1,70 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import org.apache.lucene.util.ArrayUtil; +import org.elasticsearch.common.RamUsage; + +/** Float array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of + * configurable length. */ +public final class BigFloatArrayList extends AbstractBigArray { + + /** Default page size, 16KB of memory per page. */ + private static final int DEFAULT_PAGE_SIZE = 1 << 12; + + private float[][] pages; + + public BigFloatArrayList(int pageSize, long initialCapacity) { + super(pageSize); + pages = new float[numPages(initialCapacity)][]; + } + + public BigFloatArrayList(long initialCapacity) { + this(DEFAULT_PAGE_SIZE, initialCapacity); + } + + public BigFloatArrayList() { + this(1024); + } + + public float get(long index) { + assert index >= 0 && index < size; + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return pages[pageIndex][indexInPage]; + } + + public void add(float f) { + final int pageIndex = pageIndex(size); + pages = ArrayUtil.grow(pages, pageIndex + 1); + if (pages[pageIndex] == null) { + pages[pageIndex] = new float[pageSize()]; + } + final int indexInPage = indexInPage(size); + pages[pageIndex][indexInPage] = f; + ++size; + } + + @Override + protected int numBytesPerElement() { + return RamUsage.NUM_BYTES_FLOAT; + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/BigIntArray.java b/src/main/java/org/elasticsearch/common/util/BigIntArray.java new file mode 100644 index 00000000000..669940bd273 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/BigIntArray.java @@ -0,0 +1,69 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +import org.elasticsearch.common.RamUsage; + +/** Int array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of + * configurable length. */ +public final class BigIntArray extends AbstractBigArray implements IntArray { + + /** Default page size, 16KB of memory per page. */ + public static final int DEFAULT_PAGE_SIZE = 1 << 12; + + private int[][] pages; + + public BigIntArray(int pageSize, long size) { + super(pageSize); + this.size = size; + pages = new int[numPages(size)][]; + for (int i = 0; i < pages.length; ++i) { + pages[i] = new int[pageSize()]; + } + } + + public BigIntArray(long size) { + this(DEFAULT_PAGE_SIZE, size); + } + + public int get(long index) { + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return pages[pageIndex][indexInPage]; + } + + public void set(long index, int value) { + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + pages[pageIndex][indexInPage] = value; + } + + public int increment(long index, int inc) { + final int pageIndex = pageIndex(index); + final int indexInPage = indexInPage(index); + return pages[pageIndex][indexInPage] += inc; + } + + @Override + protected int numBytesPerElement() { + return RamUsage.NUM_BYTES_INT; + } + +} diff --git a/src/main/java/org/elasticsearch/common/util/IntArray.java b/src/main/java/org/elasticsearch/common/util/IntArray.java new file mode 100644 index 00000000000..234683eb048 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/IntArray.java @@ -0,0 +1,34 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +/** Abstraction of an array of integer values. */ +public interface IntArray { + + /** Get an element given its index. */ + public abstract int get(long index); + + /** Set a value at the given index. */ + public abstract void set(long index, int value); + + /** Increment value at the given index by inc and return the value. */ + public abstract int increment(long index, int inc); + +} diff --git a/src/main/java/org/elasticsearch/common/util/IntArrays.java b/src/main/java/org/elasticsearch/common/util/IntArrays.java new file mode 100644 index 00000000000..1a1f91bbee0 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/util/IntArrays.java @@ -0,0 +1,66 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.util; + +/** Utility methods to work with {@link IntArray}s. */ +public class IntArrays { + + private IntArrays() {} + + /** Return a {@link IntArray} view over the provided array. */ + public static IntArray wrap(final int[] array) { + return new IntArray() { + + private void checkIndex(long index) { + if (index > Integer.MAX_VALUE) { + throw new IndexOutOfBoundsException(Long.toString(index)); + } + } + + @Override + public void set(long index, int value) { + checkIndex(index); + array[(int) index] = value; + } + + @Override + public int increment(long index, int inc) { + checkIndex(index); + return array[(int) index] += inc; + } + + @Override + public int get(long index) { + checkIndex(index); + return array[(int) index]; + } + }; + } + + /** Return a newly allocated {@link IntArray} of the given length or more. */ + public static IntArray allocate(long length) { + if (length <= BigIntArray.DEFAULT_PAGE_SIZE) { + return wrap(new int[(int) length]); + } else { + return new BigIntArray(length); + } + } + +} diff --git a/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java b/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java index 1162163059d..9e8c49fa630 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/BytesValues.java @@ -120,10 +120,10 @@ public abstract class BytesValues { public static class Single implements Iter { protected BytesRef value; - protected int ord; + protected long ord; protected boolean done; - public Single reset(BytesRef value, int ord) { + public Single reset(BytesRef value, long ord) { this.value = value; this.ord = ord; this.done = false; @@ -149,8 +149,8 @@ public abstract class BytesValues { static class Multi implements Iter { - protected int innerOrd; - protected int ord; + protected long innerOrd; + protected long ord; protected BytesValues.WithOrdinals withOrds; protected Ordinals.Docs.Iter ordsIter; protected final BytesRef scratch = new BytesRef(); @@ -226,7 +226,7 @@ public abstract class BytesValues { return ordinals; } - public BytesRef getValueByOrd(int ord) { + public BytesRef getValueByOrd(long ord) { return getValueScratchByOrd(ord, scratch); } @@ -247,7 +247,7 @@ public abstract class BytesValues { @Override public BytesRef getValue(int docId) { - final int ord = ordinals.getOrd(docId); + final long ord = ordinals.getOrd(docId); if (ord == 0) { return null; } @@ -268,7 +268,7 @@ public abstract class BytesValues { * result which will also be returned. If there is no value for this docId, the length will be 0. * Note, the bytes are not "safe". */ - public abstract BytesRef getValueScratchByOrd(int ord, BytesRef ret); + public abstract BytesRef getValueScratchByOrd(long ord, BytesRef ret); public static class Empty extends WithOrdinals { @@ -277,7 +277,7 @@ public abstract class BytesValues { } @Override - public BytesRef getValueScratchByOrd(int ord, BytesRef ret) { + public BytesRef getValueScratchByOrd(long ord, BytesRef ret) { ret.length = 0; return ret; } diff --git a/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java b/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java index 69af179b8e3..4d0893c4c95 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java @@ -120,7 +120,7 @@ public abstract class DoubleValues { @Override public final double getValueMissing(int docId, double missingValue) { - final int ord = ordinals.getOrd(docId); + final long ord = ordinals.getOrd(docId); if (ord == 0) { return missingValue; } else { @@ -128,7 +128,7 @@ public abstract class DoubleValues { } } - public abstract double getValueByOrd(int ord); + public abstract double getValueByOrd(long ord); @Override public final Iter getIter(int docId) { @@ -184,8 +184,8 @@ public abstract class DoubleValues { static class Multi implements Iter { - private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter; - private int ord; + private Ordinals.Docs.Iter ordsIter; + private long ord; private WithOrdinals values; public Multi(WithOrdinals values) { diff --git a/src/main/java/org/elasticsearch/index/fielddata/LongValues.java b/src/main/java/org/elasticsearch/index/fielddata/LongValues.java index 91e3840fddc..7e19c60e076 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/LongValues.java +++ b/src/main/java/org/elasticsearch/index/fielddata/LongValues.java @@ -118,7 +118,7 @@ public abstract class LongValues { return getValueByOrd(ordinals.getOrd(docId)); } - public abstract long getValueByOrd(int ord); + public abstract long getValueByOrd(long ord); @Override public final Iter getIter(int docId) { @@ -127,7 +127,7 @@ public abstract class LongValues { @Override public final long getValueMissing(int docId, long missingValue) { - final int ord = ordinals.getOrd(docId); + final long ord = ordinals.getOrd(docId); if (ord == 0) { return missingValue; } else { @@ -185,7 +185,7 @@ public abstract class LongValues { static class Multi implements Iter { private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter; - private int ord; + private long ord; private WithOrdinals values; public Multi(WithOrdinals values) { diff --git a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java index fe01488f1c3..1d50b66a549 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java +++ b/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefOrdValComparator.java @@ -45,7 +45,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { /* Ords for each slot. @lucene.internal */ - final int[] ords; + final long[] ords; final SortMode sortMode; @@ -75,7 +75,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { /* Bottom ord (same as ords[bottomSlot] once bottomSlot is set). Cached for faster compares. @lucene.internal */ - int bottomOrd; + long bottomOrd; /* True if current bottom slot matches the current reader. @@ -92,7 +92,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { public BytesRefOrdValComparator(IndexFieldData.WithOrdinals indexFieldData, int numHits, SortMode sortMode) { this.indexFieldData = indexFieldData; this.sortMode = sortMode; - ords = new int[numHits]; + ords = new long[numHits]; values = new BytesRef[numHits]; readerGen = new int[numHits]; } @@ -100,7 +100,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public int compare(int slot1, int slot2) { if (readerGen[slot1] == readerGen[slot2]) { - return ords[slot1] - ords[slot2]; + return LongValuesComparator.compare(ords[slot1], ords[slot2]); } final BytesRef val1 = values[slot1]; @@ -207,7 +207,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { final int docOrd = (readerOrds[doc] & 0xFF); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return (int) bottomOrd - docOrd; } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -253,7 +253,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { final int docOrd = (readerOrds[doc] & 0xFFFF); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return (int) bottomOrd - docOrd; } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -299,7 +299,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { final int docOrd = readerOrds[doc]; if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return (int) bottomOrd - docOrd; } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -345,10 +345,10 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public int compareBottom(int doc) { assert bottomSlot != -1; - final int docOrd = readerOrds.getOrd(doc); + final long docOrd = readerOrds.getOrd(doc); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return LongValuesComparator.compare(bottomOrd, docOrd); } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -361,7 +361,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public void copy(int slot, int doc) { - final int ord = readerOrds.getOrd(doc); + final long ord = readerOrds.getOrd(doc); ords[slot] = ord; if (ord == 0) { values[slot] = null; @@ -428,7 +428,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { - final int index = binarySearch(termsIndex, bottomValue); + final long index = binarySearch(termsIndex, bottomValue); if (index < 0) { bottomOrd = -index - 2; bottomSameReader = false; @@ -448,15 +448,15 @@ public final class BytesRefOrdValComparator extends FieldComparator { return values[slot]; } - final protected static int binarySearch(BytesValues.WithOrdinals a, BytesRef key) { + final protected static long binarySearch(BytesValues.WithOrdinals a, BytesRef key) { return binarySearch(a, key, 1, a.ordinals().getNumOrds()); } - final protected static int binarySearch(BytesValues.WithOrdinals a, BytesRef key, int low, int high) { + final protected static long binarySearch(BytesValues.WithOrdinals a, BytesRef key, long low, long high) { assert a.getValueByOrd(high) == null | a.getValueByOrd(high) != null; // make sure we actually can get these values assert a.getValueByOrd(low) == null | a.getValueByOrd(low) != null; while (low <= high) { - int mid = (low + high) >>> 1; + long mid = (low + high) >>> 1; BytesRef midVal = a.getValueByOrd(mid); int cmp; if (midVal != null) { @@ -488,10 +488,10 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public int compareBottom(int doc) throws IOException { - final int docOrd = getRelevantOrd(readerOrds, doc, sortMode); + final long docOrd = getRelevantOrd(readerOrds, doc, sortMode); if (bottomSameReader) { // ord is precisely comparable, even in the equal case - return bottomOrd - docOrd; + return LongValuesComparator.compare(bottomOrd, docOrd); } else if (bottomOrd >= docOrd) { // the equals case always means bottom is > doc // (because we set bottomOrd to the lower bound in @@ -504,7 +504,7 @@ public final class BytesRefOrdValComparator extends FieldComparator { @Override public void copy(int slot, int doc) throws IOException { - final int ord = getRelevantOrd(readerOrds, doc, sortMode); + final long ord = getRelevantOrd(readerOrds, doc, sortMode); ords[slot] = ord; if (ord == 0) { values[slot] = null; @@ -561,14 +561,14 @@ public final class BytesRefOrdValComparator extends FieldComparator { return relevantVal; } - static int getRelevantOrd(Ordinals.Docs readerOrds, int docId, SortMode sortMode) { + static long getRelevantOrd(Ordinals.Docs readerOrds, int docId, SortMode sortMode) { Ordinals.Docs.Iter iter = readerOrds.getIter(docId); - int currentVal = iter.next(); + long currentVal = iter.next(); if (currentVal == 0) { return 0; } - int relevantVal = currentVal; + long relevantVal = currentVal; while (true) { if (sortMode == SortMode.MAX) { if (currentVal > relevantVal) { diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/DocIdOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/DocIdOrdinals.java index a77ed9da675..53c4b078bb6 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/DocIdOrdinals.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/DocIdOrdinals.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; import org.elasticsearch.common.RamUsage; /** @@ -64,13 +64,13 @@ public class DocIdOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return numDocs; } @Override - public int getMaxOrd() { - return numDocs + 1; + public long getMaxOrd() { + return 1L + numDocs; } @Override @@ -81,7 +81,7 @@ public class DocIdOrdinals implements Ordinals { public static class Docs implements Ordinals.Docs { private final DocIdOrdinals parent; - private final IntsRef intsScratch = new IntsRef(new int[1], 0, 1); + private final LongsRef longsScratch = new LongsRef(new long[1], 0, 1); private final SingleValueIter iter = new SingleValueIter(); public Docs(DocIdOrdinals parent) { @@ -99,12 +99,12 @@ public class DocIdOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return parent.getNumOrds(); } @Override - public int getMaxOrd() { + public long getMaxOrd() { return parent.getMaxOrd(); } @@ -114,14 +114,14 @@ public class DocIdOrdinals implements Ordinals { } @Override - public int getOrd(int docId) { + public long getOrd(int docId) { return docId + 1; } @Override - public IntsRef getOrds(int docId) { - intsScratch.ints[0] = docId + 1; - return intsScratch; + public LongsRef getOrds(int docId) { + longsScratch.longs[0] = docId + 1; + return longsScratch; } @Override diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/EmptyOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/EmptyOrdinals.java index 0657c3cdb60..2aa86df6ca3 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/EmptyOrdinals.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/EmptyOrdinals.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; /** */ @@ -57,12 +57,12 @@ public class EmptyOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return 0; } @Override - public int getMaxOrd() { + public long getMaxOrd() { return 1; } @@ -74,7 +74,7 @@ public class EmptyOrdinals implements Ordinals { public static class Docs implements Ordinals.Docs { private final EmptyOrdinals parent; - public static final IntsRef EMPTY_INTS_REF = new IntsRef(); + public static final LongsRef EMPTY_LONGS_REF = new LongsRef(); public Docs(EmptyOrdinals parent) { this.parent = parent; @@ -91,12 +91,12 @@ public class EmptyOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return 0; } @Override - public int getMaxOrd() { + public long getMaxOrd() { return 1; } @@ -106,13 +106,13 @@ public class EmptyOrdinals implements Ordinals { } @Override - public int getOrd(int docId) { + public long getOrd(int docId) { return 0; } @Override - public IntsRef getOrds(int docId) { - return EMPTY_INTS_REF; + public LongsRef getOrds(int docId) { + return EMPTY_LONGS_REF; } @Override diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiFlatArrayOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiFlatArrayOrdinals.java deleted file mode 100644 index 17332beeb2c..00000000000 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiFlatArrayOrdinals.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.fielddata.ordinals; - -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.common.RamUsage; - -/** - * "Flat" multi valued ordinals, the first level array size is as the maximum - * values a docId has. Ordinals are populated in order from the first flat array - * value to the next. - */ -public final class MultiFlatArrayOrdinals implements Ordinals { - - // ordinals with value 0 indicates no value - private final int[][] ordinals; - private final int numDocs; - private final int numOrds; - private final int maxOrd; - - private long size = -1; - - public MultiFlatArrayOrdinals(int[][] ordinals, int numOrds) { - assert ordinals.length > 0; - this.ordinals = ordinals; - this.numDocs = ordinals[0].length; - this.numOrds = numOrds; - this.maxOrd = numOrds + 1; - } - - @Override - public boolean hasSingleArrayBackingStorage() { - return false; - } - - @Override - public Object getBackingStorage() { - return ordinals; - } - - @Override - public long getMemorySizeInBytes() { - if (size == -1) { - long size = 0; - size += RamUsage.NUM_BYTES_ARRAY_HEADER; // for the top level array - for (int[] ordinal : ordinals) { - size += RamUsage.NUM_BYTES_INT * ordinal.length + RamUsage.NUM_BYTES_ARRAY_HEADER; - } - this.size = size; - } - return size; - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public int getNumDocs() { - return numDocs; - } - - @Override - public int getNumOrds() { - return numOrds; - } - - @Override - public int getMaxOrd() { - return this.maxOrd; - } - - @Override - public Docs ordinals() { - return new Docs(this, ordinals); - } - - public static class Docs implements Ordinals.Docs { - - private final MultiFlatArrayOrdinals parent; - private final int[][] ordinals; - private final IterImpl iter; - - private final IntsRef intsScratch; - - public Docs(MultiFlatArrayOrdinals parent, int[][] ordinals) { - this.parent = parent; - this.ordinals = ordinals; - this.iter = new IterImpl(ordinals); - this.intsScratch = new IntsRef(new int[16], 0 , 16); - } - - @Override - public Ordinals ordinals() { - return this.parent; - } - - @Override - public int getNumDocs() { - return parent.getNumDocs(); - } - - @Override - public int getNumOrds() { - return parent.getNumOrds(); - } - - @Override - public int getMaxOrd() { - return parent.getMaxOrd(); - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public int getOrd(int docId) { - return ordinals[0][docId]; - } - - @Override - public IntsRef getOrds(int docId) { - intsScratch.offset = 0; - int i; - for (i = 0; i < ordinals.length; i++) { - int ordinal = ordinals[i][docId]; - if (ordinal == 0) { - if (i == 0) { - intsScratch.length = 0; - return intsScratch; - } - break; - } - intsScratch.grow(i+1); - intsScratch.ints[i] = ordinal; - } - intsScratch.length = i; - return intsScratch; - } - - @Override - public Iter getIter(int docId) { - return iter.reset(docId); - } - - public static class IterImpl implements Docs.Iter { - - private final int[][] ordinals; - private int docId; - private int i; - - public IterImpl(int[][] ordinals) { - this.ordinals = ordinals; - } - - public IterImpl reset(int docId) { - this.docId = docId; - this.i = 0; - return this; - } - - @Override - public int next() { - if (i >= ordinals.length) return 0; - return ordinals[i++][docId]; - } - } - } -} diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiOrdinals.java new file mode 100644 index 00000000000..09e0bb8951f --- /dev/null +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/MultiOrdinals.java @@ -0,0 +1,219 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.fielddata.ordinals; + +import org.apache.lucene.util.Version; + +import org.elasticsearch.common.lucene.Lucene; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.LongsRef; +import org.apache.lucene.util.packed.AppendingLongBuffer; +import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; +import org.apache.lucene.util.packed.PackedInts; +import org.elasticsearch.common.RamUsage; +import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter; + +/** {@link Ordinals} implementation which is efficient at storing field data ordinals for multi-valued or sparse fields. */ +public class MultiOrdinals implements Ordinals { + + // hard-coded in Lucene 4.3 but will be exposed in Lucene 4.4 + static { + assert Lucene.VERSION == Version.LUCENE_43; + } + private static final int OFFSETS_PAGE_SIZE = 1024; + + /** Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */ + public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) { + final int bitsPerOrd = PackedInts.bitsRequired(numOrds); + // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the + // beginning of the block and all docs have one at the end of the block + final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc; + final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc); + final int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign + final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset; + final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd; + return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes; + } + + private final boolean multiValued; + private final long numOrds; + private final MonotonicAppendingLongBuffer endOffsets; + private final AppendingLongBuffer ords; + + public MultiOrdinals(OrdinalsBuilder builder) { + multiValued = builder.getNumMultiValuesDocs() > 0; + numOrds = builder.getNumOrds(); + endOffsets = new MonotonicAppendingLongBuffer(); + ords = new AppendingLongBuffer(); + long lastEndOffset = 0; + for (int i = 0; i < builder.maxDoc(); ++i) { + final LongsRef docOrds = builder.docOrds(i); + final long endOffset = lastEndOffset + docOrds.length; + endOffsets.add(endOffset); + for (int j = 0; j < docOrds.length; ++j) { + ords.add(docOrds.longs[docOrds.offset + j] - 1); + } + lastEndOffset = endOffset; + } + assert endOffsets.size() == builder.maxDoc(); + assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds(); + } + + @Override + public boolean hasSingleArrayBackingStorage() { + return false; + } + + @Override + public Object getBackingStorage() { + return null; + } + + @Override + public long getMemorySizeInBytes() { + return endOffsets.ramBytesUsed() + ords.ramBytesUsed(); + } + + @Override + public boolean isMultiValued() { + return multiValued; + } + + @Override + public int getNumDocs() { + return (int) endOffsets.size(); + } + + @Override + public long getNumOrds() { + return numOrds; + } + + @Override + public long getMaxOrd() { + return numOrds + 1; + } + + @Override + public Ordinals.Docs ordinals() { + return new MultiDocs(this); + } + + static class MultiDocs implements Ordinals.Docs { + + private final MultiOrdinals ordinals; + private final MonotonicAppendingLongBuffer endOffsets; + private final AppendingLongBuffer ords; + private final LongsRef longsScratch; + private final MultiIter iter; + + MultiDocs(MultiOrdinals ordinals) { + this.ordinals = ordinals; + this.endOffsets = ordinals.endOffsets; + this.ords = ordinals.ords; + this.longsScratch = new LongsRef(16); + this.iter = new MultiIter(ords); + } + + @Override + public Ordinals ordinals() { + return null; + } + + @Override + public int getNumDocs() { + return ordinals.getNumDocs(); + } + + @Override + public long getNumOrds() { + return ordinals.getNumOrds(); + } + + @Override + public long getMaxOrd() { + return ordinals.getMaxOrd(); + } + + @Override + public boolean isMultiValued() { + return ordinals.isMultiValued(); + } + + @Override + public long getOrd(int docId) { + final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0; + final long endOffset = endOffsets.get(docId); + if (startOffset == endOffset) { + return 0L; // ord for missing values + } else { + return 1L + ords.get(startOffset); + } + } + + @Override + public LongsRef getOrds(int docId) { + final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0; + final long endOffset = endOffsets.get(docId); + final int numValues = (int) (endOffset - startOffset); + if (longsScratch.length < numValues) { + longsScratch.longs = new long[ArrayUtil.oversize(numValues, RamUsage.NUM_BYTES_LONG)]; + } + for (int i = 0; i < numValues; ++i) { + longsScratch.longs[i] = 1L + ords.get(startOffset + i); + } + longsScratch.offset = 0; + longsScratch.length = numValues; + return longsScratch; + } + + @Override + public Iter getIter(int docId) { + final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0; + final long endOffset = endOffsets.get(docId); + iter.offset = startOffset; + iter.endOffset = endOffset; + return iter; + } + + } + + static class MultiIter implements Iter { + + final AppendingLongBuffer ordinals; + long offset, endOffset; + + MultiIter(AppendingLongBuffer ordinals) { + this.ordinals = ordinals; + } + + @Override + public long next() { + if (offset >= endOffset) { + return 0L; + } else { + return 1L + ordinals.get(offset++); + } + } + + } + +} diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/Ordinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/Ordinals.java index 1c23e9b0cf9..c7d65e0f1ed 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/Ordinals.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/Ordinals.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; /** * A thread safe ordinals abstraction. Ordinals can only be positive integers. @@ -54,13 +54,13 @@ public interface Ordinals { /** * The number of ordinals, excluding the "0" ordinal indicating a missing value. */ - int getNumOrds(); + long getNumOrds(); /** * Returns total unique ord count; this includes +1 for * the null ord (always 0). */ - int getMaxOrd(); + long getMaxOrd(); /** * Returns a lightweight (non thread safe) view iterator of the ordinals. @@ -88,13 +88,13 @@ public interface Ordinals { /** * The number of ordinals, excluding the "0" ordinal (indicating a missing value). */ - int getNumOrds(); + long getNumOrds(); /** * Returns total unique ord count; this includes +1 for * the null ord (always 0). */ - int getMaxOrd(); + long getMaxOrd(); /** * Is one of the docs maps to more than one ordinal? @@ -105,13 +105,13 @@ public interface Ordinals { * The ordinal that maps to the relevant docId. If it has no value, returns * 0. */ - int getOrd(int docId); + long getOrd(int docId); /** * Returns an array of ordinals matching the docIds, with 0 length one * for a doc with no ordinals. */ - IntsRef getOrds(int docId); + LongsRef getOrds(int docId); /** * Returns an iterator of the ordinals that match the docId, with an @@ -128,7 +128,7 @@ public interface Ordinals { /** * Gets the next ordinal. Returning 0 if the iteration is exhausted. */ - int next(); + long next(); } static class EmptyIter implements Iter { @@ -136,23 +136,23 @@ public interface Ordinals { public static EmptyIter INSTANCE = new EmptyIter(); @Override - public int next() { + public long next() { return 0; } } static class SingleValueIter implements Iter { - private int value; + private long value; - public SingleValueIter reset(int value) { + public SingleValueIter reset(long value) { this.value = value; return this; } @Override - public int next() { - int actual = value; + public long next() { + long actual = value; value = 0; return actual; } diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java index 235ff6b23c7..7b832d0fe0d 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/OrdinalsBuilder.java @@ -1,4 +1,3 @@ -package org.elasticsearch.index.fielddata.ordinals; /* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file @@ -17,21 +16,21 @@ package org.elasticsearch.index.fielddata.ordinals; * specific language governing permissions and limitations * under the License. */ + +package org.elasticsearch.index.fielddata.ordinals; + import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FilteredTermsEnum; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.*; -import org.apache.lucene.util.IntBlockPool.Allocator; -import org.apache.lucene.util.IntBlockPool.DirectAllocator; import org.apache.lucene.util.packed.GrowableWriter; import org.apache.lucene.util.packed.PackedInts; -import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.apache.lucene.util.packed.XPagedGrowableWriter; import org.elasticsearch.common.settings.Settings; import java.io.Closeable; import java.io.IOException; -import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; /** @@ -41,54 +40,251 @@ import java.util.Comparator; */ public final class OrdinalsBuilder implements Closeable { - private final int maxDoc; - private int[] mvOrds; - private GrowableWriter svOrds; + /** Default acceptable overhead ratio. {@link OrdinalsBuilder} memory usage is mostly transient so it is likely a better trade-off to + * trade memory for speed in order to resize less often. */ + public static final float DEFAULT_ACCEPTABLE_OVERHEAD_RATIO = PackedInts.FAST; - private int[] offsets; - private final IntBlockPool pool; - private final IntBlockPool.SliceWriter writer; - private final IntsRef intsRef = new IntsRef(1); - private final IntBlockPool.SliceReader reader; - private int currentOrd = 0; + /** The following structure is used to store ordinals. The idea is to store ords on levels of increasing sizes. Level 0 stores + * 1 value and 1 pointer to level 1. Level 1 stores 2 values and 1 pointer to level 2, ..., Level n stores 2**n values and + * 1 pointer to level n+1. If at some point an ordinal or a pointer has 0 as a value, this means that there are no remaining + * values. On the first level, ordinals.get(docId) is the first ordinal for docId or 0 if the document has no ordinals. On + * subsequent levels, the first 2^level slots are reserved and all have 0 as a value. + *

+     * Example for an index of 3 docs (O=ordinal, P = pointer)
+     * Level 0:
+     *   ordinals           [1] [4] [2]
+     *   nextLevelSlices    2  0  1
+     * Level 1:
+     *   ordinals           [0  0] [2  0] [3  4]
+     *   nextLevelSlices    0  0  1
+     * Level 2:
+     *   ordinals           [0  0  0  0] [5  0  0  0]
+     *   nextLevelSlices    0  0
+     * 
+ * On level 0, all documents have an ordinal: 0 has 1, 1 has 4 and 2 has 2 as a first ordinal, this means that we need to read + * nextLevelEntries to get the index of their ordinals on the next level. The entry for document 1 is 0, meaning that we have + * already read all its ordinals. On the contrary 0 and 2 have more ordinals which are stored at indices 2 and 1. Let's continue + * with document 2: it has 2 more ordinals on level 1: 3 and 4 and its next level index is 1 meaning that there are remaining + * ordinals on the next level. On level 2 at index 1, we can read [5 0 0 0] meaning that 5 is an ordinal as well, but the + * fact that it is followed by zeros means that there are no more ordinals. In the end, document 2 has 2, 3, 4 and 5 as ordinals. + * + * In addition to these structures, there is another array which stores the current position (level + slice + offset in the slice) + * in order to be able to append data in constant time. + */ + private static class OrdinalsStore { + + private static final int PAGE_SIZE = 1 << 12; + + /** Number of slots at level */ + private static int numSlots(int level) { + return 1 << level; + } + + private static int slotsMask(int level) { + return numSlots(level) - 1; + } + + /** Encode the position for the given level and offset. The idea is to encode the level using unary coding in the lower bits and + * then the offset in the higher bits. */ + private static long position(int level, long offset) { + assert level >= 1; + return (1 << (level - 1)) | (offset << level); + } + + /** Decode the level from an encoded position. */ + private static int level(long position) { + return 1 + Long.numberOfTrailingZeros(position); + } + + /** Decode the offset from the position. */ + private static long offset(long position, int level) { + return position >>> level; + } + + /** Get the ID of the slice given an offset. */ + private static long sliceID(int level, long offset) { + return offset >>> level; + } + + /** Compute the first offset of the given slice. */ + private static long startOffset(int level, long slice) { + return slice << level; + } + + /** Compute the number of ordinals stored for a value given its current position. */ + private static int numOrdinals(int level, long offset) { + return (1 << level) + (int) (offset & slotsMask(level)); + } + + // Current position + private XPagedGrowableWriter positions; + // First level (0) of ordinals and pointers to the next level + private final GrowableWriter firstOrdinals; + private XPagedGrowableWriter firstNextLevelSlices; + // Ordinals and pointers for other levels, starting at 1 + private final XPagedGrowableWriter[] ordinals; + private final XPagedGrowableWriter[] nextLevelSlices; + private final int[] sizes; + + private final int startBitsPerValue; + private final float acceptableOverheadRatio; + + OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) { + this.startBitsPerValue = startBitsPerValue; + this.acceptableOverheadRatio = acceptableOverheadRatio; + positions = new XPagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio); + firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio); + // over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc... + ordinals = new XPagedGrowableWriter[24]; + nextLevelSlices = new XPagedGrowableWriter[24]; + sizes = new int[24]; + Arrays.fill(sizes, 1); // reserve the 1st slice on every level + } + + /** Allocate a new slice and return its ID. */ + private long newSlice(int level) { + final long newSlice = sizes[level]++; + // Lazily allocate ordinals + if (ordinals[level] == null) { + ordinals[level] = new XPagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio); + } else { + ordinals[level] = ordinals[level].grow(sizes[level] * numSlots(level)); + if (nextLevelSlices[level] != null) { + nextLevelSlices[level] = nextLevelSlices[level].grow(sizes[level]); + } + } + return newSlice; + } + + public int addOrdinal(int docID, long ordinal) { + final long position = positions.get(docID); + + if (position == 0L) { // on the first level + // 0 or 1 ordinal + if (firstOrdinals.get(docID) == 0L) { + firstOrdinals.set(docID, ordinal); + return 1; + } else { + final long newSlice = newSlice(1); + if (firstNextLevelSlices == null) { + firstNextLevelSlices = new XPagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio); + } + firstNextLevelSlices.set(docID, newSlice); + final long offset = startOffset(1, newSlice); + ordinals[1].set(offset, ordinal); + positions.set(docID, position(1, offset)); // current position is on the 1st level and not allocated yet + return 2; + } + } else { + int level = level(position); + long offset = offset(position, level); + assert offset != 0L; + if (((offset + 1) & slotsMask(level)) == 0L) { + // reached the end of the slice, allocate a new one on the next level + final long newSlice = newSlice(level + 1); + if (nextLevelSlices[level] == null) { + nextLevelSlices[level] = new XPagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio); + } + nextLevelSlices[level].set(sliceID(level, offset), newSlice); + ++level; + offset = startOffset(level, newSlice); + assert (offset & slotsMask(level)) == 0L; + } else { + // just go to the next slot + ++offset; + } + ordinals[level].set(offset, ordinal); + final long newPosition = position(level, offset); + positions.set(docID, newPosition); + return numOrdinals(level, offset); + } + } + + public void appendOrdinals(int docID, LongsRef ords) { + // First level + final long firstOrd = firstOrdinals.get(docID); + if (firstOrd == 0L) { + return; + } + ords.longs = ArrayUtil.grow(ords.longs, ords.offset + ords.length + 1); + ords.longs[ords.offset + ords.length++] = firstOrd; + if (firstNextLevelSlices == null) { + return; + } + long sliceID = firstNextLevelSlices.get(docID); + if (sliceID == 0L) { + return; + } + // Other levels + for (int level = 1; ; ++level) { + final int numSlots = numSlots(level); + ords.longs = ArrayUtil.grow(ords.longs, ords.offset + ords.length + numSlots); + final long offset = startOffset(level, sliceID); + for (int j = 0; j < numSlots; ++j) { + final long ord = ordinals[level].get(offset + j); + if (ord == 0L) { + return; + } + ords.longs[ords.offset + ords.length++] = ord; + } + if (nextLevelSlices[level] == null) { + return; + } + sliceID = nextLevelSlices[level].get(sliceID); + if (sliceID == 0L) { + return; + } + } + } + + } + + private final int maxDoc; + private long currentOrd = 0; private int numDocsWithValue = 0; private int numMultiValuedDocs = 0; private int totalNumOrds = 0; - public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator, float acceptableOverheadRatio) throws IOException { + private OrdinalsStore ordinals; + private final LongsRef spare; + + public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException { this.maxDoc = maxDoc; - if (preDefineBitsRequired) { - int numTerms = (int) terms.size(); - if (numTerms == -1) { - svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio); - } else { - svOrds = new GrowableWriter(PackedInts.bitsRequired(numTerms), maxDoc, acceptableOverheadRatio); - } - } else { - svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio); + int startBitsPerValue = 8; + if (numTerms >= 0) { + startBitsPerValue = PackedInts.bitsRequired(numTerms); } - pool = new IntBlockPool(allocator); - reader = new IntBlockPool.SliceReader(pool); - writer = new IntBlockPool.SliceWriter(pool); + ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio); + spare = new LongsRef(); } + public OrdinalsBuilder(int maxDoc, float acceptableOverheadRatio) throws IOException { + this(-1, maxDoc, acceptableOverheadRatio); + } + public OrdinalsBuilder(int maxDoc) throws IOException { - this(null, false, maxDoc, PackedInts.DEFAULT); + this(maxDoc, DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); } - public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, float acceptableOverheadRatio) throws IOException { - this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator(), acceptableOverheadRatio); + /** + * Returns a shared {@link LongsRef} instance for the given doc ID holding all ordinals associated with it. + */ + public LongsRef docOrds(int docID) { + spare.offset = spare.length = 0; + ordinals.appendOrdinals(docID, spare); + return spare; } - public OrdinalsBuilder(Terms terms, int maxDoc, float acceptableOverheadRatio) throws IOException { - this(terms, true, maxDoc, new DirectAllocator(), acceptableOverheadRatio); + /** Return a {@link PackedInts.Reader} instance mapping every doc ID to its first ordinal if it exists and 0 otherwise. */ + public PackedInts.Reader getFirstOrdinals() { + return ordinals.firstOrdinals; } /** * Advances the {@link OrdinalsBuilder} to the next ordinal and * return the current ordinal. */ - public int nextOrdinal() { + public long nextOrdinal() { return ++currentOrd; } @@ -96,7 +292,7 @@ public final class OrdinalsBuilder implements Closeable { * Retruns the current ordinal or 0 if this build has not been advanced via * {@link #nextOrdinal()}. */ - public int currentOrdinal() { + public long currentOrdinal() { return currentOrd; } @@ -105,42 +301,11 @@ public final class OrdinalsBuilder implements Closeable { */ public OrdinalsBuilder addDoc(int doc) { totalNumOrds++; - if (svOrds != null) { - int docsOrd = (int) svOrds.get(doc); - if (docsOrd == 0) { - svOrds.set(doc, currentOrd); - numDocsWithValue++; - } else { - // Rebuilding ords that supports mv based on sv ords. - mvOrds = new int[maxDoc]; - for (int docId = 0; docId < maxDoc; docId++) { - mvOrds[docId] = (int) svOrds.get(docId); - } - svOrds = null; - } - } - - if (mvOrds != null) { - int docsOrd = mvOrds[doc]; - if (docsOrd == 0) { - mvOrds[doc] = currentOrd; - numDocsWithValue++; - } else if (docsOrd > 0) { - numMultiValuedDocs++; - int offset = writer.startNewSlice(); - writer.writeInt(docsOrd); - writer.writeInt(currentOrd); - if (offsets == null) { - offsets = new int[mvOrds.length]; - } - offsets[doc] = writer.getCurrentOffset(); - mvOrds[doc] = (-1 * offset) - 1; - } else { - assert offsets != null; - writer.reset(offsets[doc]); - writer.writeInt(currentOrd); - offsets[doc] = writer.getCurrentOffset(); - } + final int numValues = ordinals.addOrdinal(doc, currentOrd); + if (numValues == 1) { + ++numDocsWithValue; + } else if (numValues == 2) { + ++numMultiValuedDocs; } return this; } @@ -149,7 +314,7 @@ public final class OrdinalsBuilder implements Closeable { * Returns true iff this builder contains a document ID that is associated with more than one ordinal. Otherwise false; */ public boolean isMultiValued() { - return offsets != null; + return numMultiValuedDocs > 0; } /** @@ -183,7 +348,7 @@ public final class OrdinalsBuilder implements Closeable { /** * Returns the number of distinct ordinals in this builder. */ - public int getNumOrds() { + public long getNumOrds() { return currentOrd; } @@ -196,18 +361,9 @@ public final class OrdinalsBuilder implements Closeable { return null; } final FixedBitSet bitSet = new FixedBitSet(maxDoc); - if (svOrds != null) { - for (int docId = 0; docId < maxDoc; docId++) { - int ord = (int) svOrds.get(docId); - if (ord != 0) { - bitSet.set(docId); - } - } - } else { - for (int docId = 0; docId < maxDoc; docId++) { - if (mvOrds[docId] != 0) { - bitSet.set(docId); - } + for (int docID = 0; docID < maxDoc; ++docID) { + if (ordinals.firstOrdinals.get(docID) != 0) { + bitSet.set(docID); } } return bitSet; @@ -217,72 +373,15 @@ public final class OrdinalsBuilder implements Closeable { * Builds an {@link Ordinals} instance from the builders current state. */ public Ordinals build(Settings settings) { - if (numMultiValuedDocs == 0) { - return new SinglePackedOrdinals(svOrds.getMutable(), getNumOrds()); - } - final String multiOrdinals = settings.get("multi_ordinals", "sparse"); - if ("flat".equals(multiOrdinals)) { - final ArrayList ordinalBuffer = new ArrayList(); - for (int i = 0; i < mvOrds.length; i++) { - final IntsRef docOrds = docOrds(i); - while (ordinalBuffer.size() < docOrds.length) { - ordinalBuffer.add(new int[mvOrds.length]); - } - - for (int j = docOrds.offset; j < docOrds.offset+docOrds.length; j++) { - ordinalBuffer.get(j)[i] = docOrds.ints[j]; - } - } - int[][] nativeOrdinals = new int[ordinalBuffer.size()][]; - for (int i = 0; i < nativeOrdinals.length; i++) { - nativeOrdinals[i] = ordinalBuffer.get(i); - } - return new MultiFlatArrayOrdinals(nativeOrdinals, getNumOrds()); - } else if ("sparse".equals(multiOrdinals)) { - int multiOrdinalsMaxDocs = settings.getAsInt("multi_ordinals_max_docs", 16777216 /* Equal to 64MB per storeage array */); - return new SparseMultiArrayOrdinals(this, multiOrdinalsMaxDocs); + final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.COMPACT); + if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getNumOrds())) { + // MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields + return new MultiOrdinals(this); } else { - throw new ElasticSearchIllegalArgumentException("no applicable fielddata multi_ordinals value, got [" + multiOrdinals + "]"); + return new SinglePackedOrdinals(this, acceptableOverheadRatio); } } - /** - * Returns a shared {@link IntsRef} instance for the given doc ID holding all ordinals associated with it. - */ - public IntsRef docOrds(int doc) { - if (svOrds != null) { - int docsOrd = (int) svOrds.get(doc); - intsRef.offset = 0; - if (docsOrd == 0) { - intsRef.length = 0; - } else if (docsOrd > 0) { - intsRef.ints[0] = docsOrd; - intsRef.length = 1; - } - } else { - int docsOrd = mvOrds[doc]; - intsRef.offset = 0; - if (docsOrd == 0) { - intsRef.length = 0; - } else if (docsOrd > 0) { - intsRef.ints[0] = mvOrds[doc]; - intsRef.length = 1; - } else { - assert offsets != null; - reader.reset(-1 * (mvOrds[doc] + 1), offsets[doc]); - int pos = 0; - while (!reader.endOfSlice()) { - if (intsRef.ints.length <= pos) { - intsRef.ints = ArrayUtil.grow(intsRef.ints, pos + 1); - } - intsRef.ints[pos++] = reader.readInt(); - } - intsRef.length = pos; - } - } - return intsRef; - } - /** * Returns the maximum document ID this builder can associate with an ordinal */ @@ -364,7 +463,6 @@ public final class OrdinalsBuilder implements Closeable { */ @Override public void close() throws IOException { - pool.reset(true, false); - offsets = null; + ordinals = null; } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/PositiveIntPool.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/PositiveIntPool.java deleted file mode 100644 index 48e4fb0f811..00000000000 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/PositiveIntPool.java +++ /dev/null @@ -1,146 +0,0 @@ -package org.elasticsearch.index.fielddata.ordinals; - -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.ElasticSearchIllegalArgumentException; -import org.elasticsearch.common.RamUsage; - -/** - * An efficient store for positive integer slices. This pool uses multiple - * sliced arrays to hold integers in int array pages rather than an object based - * datastructures. - */ -final class PositiveIntPool { - // TODO it might be useful to store the size of the slices in a sep - // datastructure rather than useing a negative value to donate this. - private final int blockShift; - private final int blockMask; - private final int blockSize; - /** - * array of buffers currently used in the pool. Buffers are allocated if - * needed don't modify this outside of this class - */ - private int[][] buffers = new int[10][]; - - /** - * index into the buffers array pointing to the current buffer used as the - * head - */ - private int bufferUpto = -1; - /** Pointer to the current position in head buffer */ - private int intUpto; - /** Current head buffer */ - private int[] buffer; - /** Current head offset */ - private int intOffset; - - - /** - * Creates a new {@link PositiveIntPool} with the given blockShift. - * - * @param blockShift - * the n-the power of two indicating the size of each block in - * the paged datastructure. BlockSize = 1 << blockShift - */ - public PositiveIntPool(int blockShift) { - this.blockShift = blockShift; - this.blockSize = 1 << blockShift; - this.blockMask = blockSize - 1; - this.intUpto = blockSize; - this.intOffset = -blockSize; - } - - /** - * Adds all integers in the given slices and returns the positive offset - * into the datastructure to retrive this slice. - */ - public int put(IntsRef slice) { - if ( slice.length > blockSize) { - throw new ElasticSearchIllegalArgumentException("Can not store slices greater or equal to: " + blockSize); - } - if ((intUpto + slice.length) > blockSize) { - nextBuffer(); - } - final int relativeOffset = intUpto; - System.arraycopy(slice.ints, slice.offset, buffer, relativeOffset, slice.length); - intUpto += slice.length; - buffer[intUpto - 1] *= -1; // mark as end - return relativeOffset + intOffset; - } - - /** - * Returns the first value of the slice stored at the given offset. - *

- * Note: the slice length must be greater than one otherwise the returned - * value is the negative complement of the actual value - *

- */ - public int getFirstFromOffset(int offset) { - final int blockOffset = offset >> blockShift; - final int relativeOffset = offset & blockMask; - final int[] currentBuffer = buffers[blockOffset]; - assert currentBuffer[relativeOffset] >= 0; - return currentBuffer[relativeOffset]; - } - - /** - * Retrieves a previously stored slice from the pool. - * - * @param slice the slice to fill - * @param offset the offset where the slice is stored - */ - public void fill(IntsRef slice, int offset) { - final int blockOffset = offset >> blockShift; - final int relativeOffset = offset & blockMask; - final int[] currentBuffer = buffers[blockOffset]; - slice.offset = 0; - slice.length = 0; - for (int i = relativeOffset; i < currentBuffer.length; i++) { - slice.length++; - if (currentBuffer[i] < 0) { - break; - } - - } - if (slice.length != 0) { - slice.ints = ArrayUtil.grow(slice.ints, slice.length); - System.arraycopy(currentBuffer, relativeOffset, slice.ints, 0, slice.length); - slice.ints[slice.length-1] *= -1; - } - } - - public long getMemorySizeInBytes() { - return ((bufferUpto + 1) * blockSize * RamUsage.NUM_BYTES_INT) + ((bufferUpto + 1) * RamUsage.NUM_BYTES_ARRAY_HEADER); - } - - private void nextBuffer() { - if (1 + bufferUpto == buffers.length) { - int[][] newBuffers = new int[(int) (buffers.length * 1.5)][]; - System.arraycopy(buffers, 0, newBuffers, 0, buffers.length); - buffers = newBuffers; - } - buffer = buffers[1 + bufferUpto] = new int[blockSize]; - bufferUpto++; - intUpto = 0; - intOffset += blockSize; - } - -} diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SinglePackedOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/SinglePackedOrdinals.java index 175b5ec7950..8647699ddcd 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SinglePackedOrdinals.java +++ b/src/main/java/org/elasticsearch/index/fielddata/ordinals/SinglePackedOrdinals.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.common.RamUsage; @@ -29,15 +29,19 @@ public class SinglePackedOrdinals implements Ordinals { // ordinals with value 0 indicates no value private final PackedInts.Reader reader; - private final int numOrds; - private final int maxOrd; + private final long numOrds; + private final long maxOrd; private long size = -1; - public SinglePackedOrdinals(PackedInts.Reader reader, int numOrds) { + public SinglePackedOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) { + assert builder.getNumMultiValuesDocs() == 0; + this.numOrds = builder.getNumOrds(); + this.maxOrd = builder.getNumOrds() + 1; + // We don't reuse the builder as-is because it might have been built with a higher overhead ratio + final PackedInts.Mutable reader = PackedInts.getMutable(builder.maxDoc(), PackedInts.bitsRequired(getNumOrds()), acceptableOverheadRatio); + PackedInts.copy(builder.getFirstOrdinals(), 0, reader, 0, builder.maxDoc(), 8 * 1024); this.reader = reader; - this.numOrds = numOrds; - this.maxOrd = numOrds + 1; } @Override @@ -72,12 +76,12 @@ public class SinglePackedOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return numOrds; } @Override - public int getMaxOrd() { + public long getMaxOrd() { return maxOrd; } @@ -91,7 +95,7 @@ public class SinglePackedOrdinals implements Ordinals { private final SinglePackedOrdinals parent; private final PackedInts.Reader reader; - private final IntsRef intsScratch = new IntsRef(1); + private final LongsRef longsScratch = new LongsRef(1); private final SingleValueIter iter = new SingleValueIter(); public Docs(SinglePackedOrdinals parent, PackedInts.Reader reader) { @@ -110,12 +114,12 @@ public class SinglePackedOrdinals implements Ordinals { } @Override - public int getNumOrds() { + public long getNumOrds() { return parent.getNumOrds(); } @Override - public int getMaxOrd() { + public long getMaxOrd() { return parent.getMaxOrd(); } @@ -125,21 +129,21 @@ public class SinglePackedOrdinals implements Ordinals { } @Override - public int getOrd(int docId) { - return (int) reader.get(docId); + public long getOrd(int docId) { + return reader.get(docId); } @Override - public IntsRef getOrds(int docId) { - final int ordinal = (int) reader.get(docId); + public LongsRef getOrds(int docId) { + final long ordinal = reader.get(docId); if (ordinal == 0) { - intsScratch.length = 0; + longsScratch.length = 0; } else { - intsScratch.offset = 0; - intsScratch.length = 1; - intsScratch.ints[0] = ordinal; + longsScratch.offset = 0; + longsScratch.length = 1; + longsScratch.longs[0] = ordinal; } - return intsScratch; + return longsScratch; } @Override diff --git a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SparseMultiArrayOrdinals.java b/src/main/java/org/elasticsearch/index/fielddata/ordinals/SparseMultiArrayOrdinals.java deleted file mode 100644 index 55cd3e5384f..00000000000 --- a/src/main/java/org/elasticsearch/index/fielddata/ordinals/SparseMultiArrayOrdinals.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.fielddata.ordinals; - -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.common.RamUsage; - -/** - * Ordinals implementation that stores the ordinals into sparse fixed arrays. - *

- * This prevents large ordinal arrays that are created in for example {@link MultiFlatArrayOrdinals} when - * only a few documents have a lot of terms per field. - */ -public final class SparseMultiArrayOrdinals implements Ordinals { - - private final int[] lookup; - private final PositiveIntPool pool; - private final int numOrds; - private final int maxOrd; - private final int numDocs; - private long size = -1; - - public SparseMultiArrayOrdinals(OrdinalsBuilder builder, int maxSize) { - int blockShift = Math.min(floorPow2(builder.getTotalNumOrds() << 1), floorPow2(maxSize)); - this.pool = new PositiveIntPool(Math.max(4, blockShift)); - this.numDocs = builder.maxDoc(); - - - this.lookup = new int[numDocs]; - this.numOrds = builder.getNumOrds(); - this.maxOrd = numOrds + 1; - IntsRef spare; - for (int doc = 0; doc < numDocs; doc++) { - spare = builder.docOrds(doc); - final int size = spare.length; - if (size == 0) { - lookup[doc] = 0; - } else if (size == 1) { - lookup[doc] = spare.ints[spare.offset]; - } else { - int offset = pool.put(spare); - lookup[doc] = -(offset) - 1; - } - } - } - - private static int floorPow2(int number) { - return 31 - Integer.numberOfLeadingZeros(number); - } - - @Override - public boolean hasSingleArrayBackingStorage() { - return false; - } - - @Override - public Object getBackingStorage() { - return null; - } - - @Override - public long getMemorySizeInBytes() { - if (size == -1) { - size = (RamUsage.NUM_BYTES_ARRAY_HEADER + (RamUsage.NUM_BYTES_INT * lookup.length)) + pool.getMemorySizeInBytes(); - } - return size; - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public int getNumDocs() { - return numDocs; - } - - @Override - public int getNumOrds() { - return numOrds; - } - - @Override - public int getMaxOrd() { - return maxOrd; - } - - @Override - public Docs ordinals() { - return new Docs(this, lookup, pool); - } - - static class Docs implements Ordinals.Docs { - - private final SparseMultiArrayOrdinals parent; - private final int[] lookup; - - private final IterImpl iter; - private final PositiveIntPool pool; - private final IntsRef spare = new IntsRef(1); - - public Docs(SparseMultiArrayOrdinals parent, int[] lookup, PositiveIntPool pool) { - this.parent = parent; - this.lookup = lookup; - this.pool = pool; - this.iter = new IterImpl(lookup, pool); - } - - @Override - public Ordinals ordinals() { - return this.parent; - } - - @Override - public int getNumDocs() { - return parent.getNumDocs(); - } - - @Override - public int getNumOrds() { - return parent.getNumOrds(); - } - - @Override - public int getMaxOrd() { - return parent.getMaxOrd(); - } - - @Override - public boolean isMultiValued() { - return true; - } - - @Override - public int getOrd(int docId) { - int pointer = lookup[docId]; - if (pointer < 0) { - return pool.getFirstFromOffset(-(pointer + 1)); - } - return pointer; - } - - @Override - public IntsRef getOrds(int docId) { - spare.offset = 0; - int pointer = lookup[docId]; - if (pointer == 0) { - spare.length = 0; - } else if (pointer > 0) { - spare.length = 1; - spare.ints[0] = pointer; - return spare; - } else { - pool.fill(spare, -(pointer + 1)); - return spare; - } - return spare; - } - - @Override - public Iter getIter(int docId) { - return iter.reset(docId); - } - - class IterImpl implements Docs.Iter { - private final int[] lookup; - private final PositiveIntPool pool; - private final IntsRef slice = new IntsRef(1); - private int valuesOffset; - - public IterImpl(int[] lookup, PositiveIntPool pool) { - this.lookup = lookup; - this.pool = pool; - } - - public IterImpl reset(int docId) { - final int pointer = lookup[docId]; - if (pointer < 0) { - pool.fill(slice, -(pointer + 1)); - } else { - slice.ints[0] = pointer; - slice.offset = 0; - slice.length = 1; - } - valuesOffset = 0; - return this; - } - - @Override - public int next() { - if (valuesOffset >= slice.length) { - return 0; - } - return slice.ints[slice.offset + (valuesOffset++)]; - } - } - } -} diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayAtomicFieldData.java index de603b5b1b4..6b0949d5136 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayAtomicFieldData.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.RamUsage; +import org.elasticsearch.common.util.BigDoubleArrayList; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.ordinals.Ordinals; @@ -30,14 +31,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData public static final DoubleArrayAtomicFieldData EMPTY = new Empty(); - protected final double[] values; private final int numDocs; protected long size = -1; - public DoubleArrayAtomicFieldData(double[] values, int numDocs) { + public DoubleArrayAtomicFieldData(int numDocs) { super(true); - this.values = values; this.numDocs = numDocs; } @@ -53,7 +52,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData static class Empty extends DoubleArrayAtomicFieldData { Empty() { - super(null, 0); + super(0); } @Override @@ -94,10 +93,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData public static class WithOrdinals extends DoubleArrayAtomicFieldData { + private final BigDoubleArrayList values; private final Ordinals ordinals; - public WithOrdinals(double[] values, int numDocs, Ordinals ordinals) { - super(values, numDocs); + public WithOrdinals(BigDoubleArrayList values, int numDocs, Ordinals ordinals) { + super(numDocs); + this.values = values; this.ordinals = ordinals; } @@ -114,7 +115,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + ordinals.getMemorySizeInBytes(); + size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes(); } return size; } @@ -133,31 +134,31 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals { - private final double[] values; + private final BigDoubleArrayList values; - LongValues(double[] values, Ordinals.Docs ordinals) { + LongValues(BigDoubleArrayList values, Ordinals.Docs ordinals) { super(ordinals); this.values = values; } @Override - public final long getValueByOrd(int ord) { - return (long) values[ord]; + public final long getValueByOrd(long ord) { + return (long) values.get(ord); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals { - private final double[] values; + private final BigDoubleArrayList values; - DoubleValues(double[] values, Ordinals.Docs ordinals) { + DoubleValues(BigDoubleArrayList values, Ordinals.Docs ordinals) { super(ordinals); this.values = values; } @Override - public double getValueByOrd(int ord) { - return values[ord]; + public double getValueByOrd(long ord) { + return values.get(ord); } } } @@ -168,10 +169,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData */ public static class SingleFixedSet extends DoubleArrayAtomicFieldData { + private final BigDoubleArrayList values; private final FixedBitSet set; - public SingleFixedSet(double[] values, int numDocs, FixedBitSet set) { - super(values, numDocs); + public SingleFixedSet(BigDoubleArrayList values, int numDocs, FixedBitSet set) { + super(numDocs); + this.values = values; this.set = set; } @@ -188,7 +191,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + (set.getBits().length * RamUsage.NUM_BYTES_LONG); + size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + (set.getBits().length * RamUsage.NUM_BYTES_LONG); } return size; } @@ -205,10 +208,10 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData static class LongValues extends org.elasticsearch.index.fielddata.LongValues { - private final double[] values; + private final BigDoubleArrayList values; private final FixedBitSet set; - LongValues(double[] values, FixedBitSet set) { + LongValues(BigDoubleArrayList values, FixedBitSet set) { super(false); this.values = values; this.set = set; @@ -221,16 +224,16 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public long getValue(int docId) { - return (long) values[docId]; + return (long) values.get(docId); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues { - private final double[] values; + private final BigDoubleArrayList values; private final FixedBitSet set; - DoubleValues(double[] values, FixedBitSet set) { + DoubleValues(BigDoubleArrayList values, FixedBitSet set) { super(false); this.values = values; this.set = set; @@ -243,7 +246,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public double getValue(int docId) { - return values[docId]; + return values.get(docId); } } @@ -254,12 +257,15 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData */ public static class Single extends DoubleArrayAtomicFieldData { + private final BigDoubleArrayList values; + /** * Note, here, we assume that there is no offset by 1 from docId, so position 0 * is the value for docId 0. */ - public Single(double[] values, int numDocs) { - super(values, numDocs); + public Single(BigDoubleArrayList values, int numDocs) { + super(numDocs); + this.values = values; } @Override @@ -275,7 +281,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE); + size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes(); } return size; } @@ -292,32 +298,32 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense { - private final double[] values; + private final BigDoubleArrayList values; - LongValues(double[] values) { + LongValues(BigDoubleArrayList values) { super(false); this.values = values; } @Override public long getValue(int docId) { - return (long) values[docId]; + return (long) values.get(docId); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense { - private final double[] values; + private final BigDoubleArrayList values; - DoubleValues(double[] values) { + DoubleValues(BigDoubleArrayList values) { super(false); this.values = values; } @Override public double getValue(int docId) { - return values[docId]; + return values.get(docId); } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java index 8747fc2e214..dbd5cf030a9 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.fielddata.plain; -import gnu.trove.list.array.TDoubleArrayList; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Terms; @@ -27,11 +26,11 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigDoubleArrayList; import org.elasticsearch.index.Index; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; @@ -49,7 +48,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) { return new DoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache); } } @@ -92,11 +91,11 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData fst; @@ -104,18 +102,17 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals fstEnum = new BytesRefFSTEnum(fst); - int[] hashes = new int[ordinals.getMaxOrd()]; - InputOutput next; + BigIntArray hashes = new BigIntArray(ordinals.getMaxOrd()); // we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support // empty strings twice. ie. them merge fails for long output. - hashes[0] = new BytesRef().hashCode(); - int i = 1; + hashes.set(0, new BytesRef().hashCode()); try { - while ((next = fstEnum.next()) != null) { - hashes[i++] = next.input.hashCode(); + for (long i = 1, maxOrd = ordinals.getMaxOrd(); i < maxOrd; ++i) { + hashes.set(i, fstEnum.next().input.hashCode()); } - } catch (IOException ex) { - //bogus + assert fstEnum.next() == null; + } catch (IOException e) { + throw new AssertionError("Cannot happen", e); } this.hashes = hashes; } @@ -141,7 +138,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals fst, Docs ordinals, int[] hashes) { + SingleHashed(FST fst, Docs ordinals, BigIntArray hashes) { super(fst, ordinals); this.hashes = hashes; } @@ -188,16 +185,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals fst, Docs ordinals, int[] hashes) { + MultiHashed(FST fst, Docs ordinals, BigIntArray hashes) { super(fst, ordinals); this.hashes = hashes; } @@ -230,16 +227,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals fstBuilder = new org.apache.lucene.util.fst.Builder(INPUT_TYPE.BYTE1, outputs); final IntsRef scratch = new IntsRef(); - boolean preDefineBitsRequired = regex == null && frequency == null; - final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); - OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio); + final long numTerms; + if (regex == null && frequency == null) { + numTerms = terms.size(); + } else { + numTerms = -1; + } + final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); + OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio); try { // we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support @@ -75,7 +79,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData 0; fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd); docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayAtomicFieldData.java index 9e1106e625e..c4048d77558 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayAtomicFieldData.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.RamUsage; +import org.elasticsearch.common.util.BigFloatArrayList; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.ordinals.Ordinals; @@ -30,14 +31,12 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { public static final FloatArrayAtomicFieldData EMPTY = new Empty(); - protected final float[] values; private final int numDocs; protected long size = -1; - public FloatArrayAtomicFieldData(float[] values, int numDocs) { + public FloatArrayAtomicFieldData(int numDocs) { super(true); - this.values = values; this.numDocs = numDocs; } @@ -53,7 +52,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { static class Empty extends FloatArrayAtomicFieldData { Empty() { - super(null, 0); + super(0); } @Override @@ -95,9 +94,11 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { public static class WithOrdinals extends FloatArrayAtomicFieldData { private final Ordinals ordinals; + private final BigFloatArrayList values; - public WithOrdinals(float[] values, int numDocs, Ordinals ordinals) { - super(values, numDocs); + public WithOrdinals(BigFloatArrayList values, int numDocs, Ordinals ordinals) { + super(numDocs); + this.values = values; this.ordinals = ordinals; } @@ -114,7 +115,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT) + ordinals.getMemorySizeInBytes(); + size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes(); } return size; } @@ -131,31 +132,31 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals { - private final float[] values; + private final BigFloatArrayList values; - LongValues(float[] values, Ordinals.Docs ordinals) { + LongValues(BigFloatArrayList values, Ordinals.Docs ordinals) { super(ordinals); this.values = values; } @Override - public long getValueByOrd(int ord) { - return (long) values[ord]; + public long getValueByOrd(long ord) { + return (long) values.get(ord); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals { - private final float[] values; + private final BigFloatArrayList values; - DoubleValues(float[] values, Ordinals.Docs ordinals) { + DoubleValues(BigFloatArrayList values, Ordinals.Docs ordinals) { super(ordinals); this.values = values; } @Override - public double getValueByOrd(int ord) { - return values[ord]; + public double getValueByOrd(long ord) { + return values.get(ord); } } } @@ -166,10 +167,12 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { */ public static class SingleFixedSet extends FloatArrayAtomicFieldData { + private final BigFloatArrayList values; private final FixedBitSet set; - public SingleFixedSet(float[] values, int numDocs, FixedBitSet set) { - super(values, numDocs); + public SingleFixedSet(BigFloatArrayList values, int numDocs, FixedBitSet set) { + super(numDocs); + this.values = values; this.set = set; } @@ -186,7 +189,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT) + (set.getBits().length * RamUsage.NUM_BYTES_LONG); + size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + (set.getBits().length * RamUsage.NUM_BYTES_LONG); } return size; } @@ -204,10 +207,10 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { static class LongValues extends org.elasticsearch.index.fielddata.LongValues { - private final float[] values; + private final BigFloatArrayList values; private final FixedBitSet set; - LongValues(float[] values, FixedBitSet set) { + LongValues(BigFloatArrayList values, FixedBitSet set) { super(false); this.values = values; this.set = set; @@ -220,16 +223,16 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public long getValue(int docId) { - return (long) values[docId]; + return (long) values.get(docId); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues { - private final float[] values; + private final BigFloatArrayList values; private final FixedBitSet set; - DoubleValues(float[] values, FixedBitSet set) { + DoubleValues(BigFloatArrayList values, FixedBitSet set) { super(false); this.values = values; this.set = set; @@ -242,7 +245,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public double getValue(int docId) { - return (double) values[docId]; + return (double) values.get(docId); } } @@ -254,12 +257,15 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { */ public static class Single extends FloatArrayAtomicFieldData { + private final BigFloatArrayList values; + /** * Note, here, we assume that there is no offset by 1 from docId, so position 0 * is the value for docId 0. */ - public Single(float[] values, int numDocs) { - super(values, numDocs); + public Single(BigFloatArrayList values, int numDocs) { + super(numDocs); + this.values = values; } @Override @@ -275,7 +281,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { @Override public long getMemorySizeInBytes() { if (size == -1) { - size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT); + size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes(); } return size; } @@ -293,32 +299,32 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData { static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense { - private final float[] values; + private final BigFloatArrayList values; - LongValues(float[] values) { + LongValues(BigFloatArrayList values) { super(false); this.values = values; } @Override public long getValue(int docId) { - return (long) values[docId]; + return (long) values.get(docId); } } static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense { - private final float[] values; + private final BigFloatArrayList values; - DoubleValues(float[] values) { + DoubleValues(BigFloatArrayList values) { super(false); this.values = values; } @Override public double getValue(int docId) { - return (double) values[docId]; + return (double) values.get(docId); } } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayIndexFieldData.java index 7c71d0897fb..80c1d1ba51e 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/FloatArrayIndexFieldData.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.fielddata.plain; -import gnu.trove.list.array.TFloatArrayList; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Terms; @@ -27,11 +26,11 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.util.packed.PackedInts; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigFloatArrayList; import org.elasticsearch.index.Index; import org.elasticsearch.index.fielddata.*; import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource; @@ -49,7 +48,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) { return new FloatArrayIndexFieldData(index, indexSettings, fieldNames, type, cache); } } @@ -91,12 +90,12 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) { return new GeoPointDoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache); } } @@ -83,12 +82,12 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData 0) { sValues.set(i, values.get(ord - 1) - minValue); } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java index 00dc50eda5b..16b7c37b61e 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/PagedBytesAtomicFieldData.java @@ -23,6 +23,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.PagedBytes.Reader; import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer; +import org.elasticsearch.common.util.BigIntArray; import org.elasticsearch.index.fielddata.AtomicFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals; @@ -42,7 +43,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals termCountHardLimit) { - // app is misusing the API (there is more than - // one term per doc); in this case we make best - // effort to load what we can (see LUCENE-2142) - numUniqueTerms = termCountHardLimit; - } - } - final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer(); termOrdToBytesOffset.add(0); // first ord is reserved for missing values - boolean preDefineBitsRequired = regex == null && frequency == null; - final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT); - OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio); + final long numTerms; + if (regex == null && frequency == null) { + numTerms = terms.size(); + } else { + numTerms = -1; + } + final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO); + OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio); try { // 0 is reserved for "unset" bytes.copyUsingLengthPrefix(new BytesRef()); TermsEnum termsEnum = filter(terms, reader); DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { - final int termOrd = builder.nextOrdinal(); + final long termOrd = builder.nextOrdinal(); assert termOrd == termOrdToBytesOffset.size(); termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term)); docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetExecutor.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetExecutor.java index a157e114f8c..72d5e09e1a3 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetExecutor.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetExecutor.java @@ -27,6 +27,8 @@ import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.cache.recycler.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; +import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.common.util.IntArrays; import org.elasticsearch.index.fielddata.BytesValues; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.ordinals.Ordinals; @@ -114,7 +116,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we get around this?) int count = 0; do { - count += agg.counts[agg.position]; + count += agg.counts.get(agg.position); if (agg.nextPosition()) { agg = queue.updateTop(); } else { @@ -144,12 +146,6 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop(); } - for (ReaderAggregator aggregator : aggregators) { - if (aggregator.counts.length > ordinalsCacheAbove) { - cacheRecycler.pushIntArray(aggregator.counts); - } - } - return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total); } @@ -160,7 +156,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we work around that?) int count = 0; do { - count += agg.counts[agg.position]; + count += agg.counts.get(agg.position); if (agg.nextPosition()) { agg = queue.updateTop(); } else { @@ -186,13 +182,6 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { } } - - for (ReaderAggregator aggregator : aggregators) { - if (aggregator.counts.length > ordinalsCacheAbove) { - cacheRecycler.pushIntArray(aggregator.counts); - } - } - return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total); } @@ -207,8 +196,8 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { @Override public void setNextReader(AtomicReaderContext context) throws IOException { if (current != null) { - missing += current.counts[0]; - total += current.total - current.counts[0]; + missing += current.counts.get(0); + total += current.total - current.counts.get(0); if (current.values.ordinals().getNumOrds() > 0) { aggregators.add(current); } @@ -221,7 +210,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { @Override public void collect(int doc) throws IOException { Iter iter = ordinals.getIter(doc); - int ord = iter.next(); + long ord = iter.next(); current.onOrdinal(doc, ord); while ((ord = iter.next()) != 0) { current.onOrdinal(doc, ord); @@ -231,8 +220,8 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { @Override public void postCollection() { if (current != null) { - missing += current.counts[0]; - total += current.total - current.counts[0]; + missing += current.counts.get(0); + total += current.total - current.counts.get(0); // if we have values for this one, add it if (current.values.ordinals().getNumOrds() > 0) { aggregators.add(current); @@ -247,26 +236,21 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor { public static final class ReaderAggregator { final BytesValues.WithOrdinals values; - final int[] counts; + final IntArray counts; - int position = 0; + long position = 0; BytesRef current; int total; - private final int maxOrd; + private final long maxOrd; public ReaderAggregator(BytesValues.WithOrdinals values, int ordinalsCacheLimit, CacheRecycler cacheRecycler) { this.values = values; this.maxOrd = values.ordinals().getMaxOrd(); - - if (maxOrd > ordinalsCacheLimit) { - this.counts = cacheRecycler.popIntArray(maxOrd); - } else { - this.counts = new int[maxOrd]; - } + this.counts = IntArrays.allocate(maxOrd); } - final void onOrdinal(int docId, int ordinal) { - counts[ordinal]++; + final void onOrdinal(int docId, long ordinal) { + counts.increment(ordinal, 1); total++; } diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/FilterFieldDataTest.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/FilterFieldDataTest.java index 644eec3cb01..72f65634b6c 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/FilterFieldDataTest.java +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/FilterFieldDataTest.java @@ -82,7 +82,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(2, equalTo(ordinals.getNumOrds())); + assertThat(2L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10")); assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100")); @@ -95,7 +95,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(1, equalTo(ordinals.getNumOrds())); + assertThat(1L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5")); } @@ -108,7 +108,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(2, equalTo(ordinals.getNumOrds())); + assertThat(2L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10")); assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100")); @@ -122,7 +122,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(2, equalTo(ordinals.getNumOrds())); + assertThat(2L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10")); assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100")); @@ -139,7 +139,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(1, equalTo(ordinals.getNumOrds())); + assertThat(1L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("100")); } @@ -184,7 +184,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(1, equalTo(ordinals.getNumOrds())); + assertThat(1L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5")); } @@ -196,7 +196,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests { AtomicFieldData.WithOrdinals loadDirect = (WithOrdinals) fieldData.loadDirect(context); BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues(); Docs ordinals = bytesValues.ordinals(); - assertThat(2, equalTo(ordinals.getNumOrds())); + assertThat(2L, equalTo(ordinals.getNumOrds())); assertThat(1000, equalTo(ordinals.getNumDocs())); assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10")); assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("5")); diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/FlatMultiOrdinalsTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/FlatMultiOrdinalsTests.java deleted file mode 100644 index 840e4b8c6d1..00000000000 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/FlatMultiOrdinalsTests.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.test.unit.index.fielddata.ordinals; - -import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.index.fielddata.ordinals.Ordinals; -import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; - -/** - */ -public class FlatMultiOrdinalsTests extends MultiOrdinalsTests { - - @Override - protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) { - settings.put("multi_ordinals", "flat"); - return builder.build(settings.build()); - } -} diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/MultiOrdinalsTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/MultiOrdinalsTests.java index 880386d8822..576b61ba34c 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/MultiOrdinalsTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/MultiOrdinalsTests.java @@ -19,8 +19,9 @@ package org.elasticsearch.test.unit.index.fielddata.ordinals; -import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.LongsRef; import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.index.fielddata.ordinals.MultiOrdinals; import org.elasticsearch.index.fielddata.ordinals.Ordinals; import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; import org.testng.annotations.Test; @@ -30,18 +31,20 @@ import java.util.*; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; /** */ -public abstract class MultiOrdinalsTests { +public class MultiOrdinalsTests { protected final Ordinals creationMultiOrdinals(OrdinalsBuilder builder) { return this.creationMultiOrdinals(builder, ImmutableSettings.builder()); } - protected abstract Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings); + protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) { + return builder.build(settings.build()); + } + @Test public void testRandomValues() throws IOException { @@ -74,7 +77,7 @@ public abstract class MultiOrdinalsTests { return 1; } }); - int lastOrd = -1; + long lastOrd = -1; for (OrdAndId ordAndId : ordsAndIds) { if (lastOrd != ordAndId.ord) { lastOrd = ordAndId.ord; @@ -105,27 +108,27 @@ public abstract class MultiOrdinalsTests { Ordinals ords = creationMultiOrdinals(builder); Ordinals.Docs docs = ords.ordinals(); int docId = ordsAndIds.get(0).id; - List docOrds = new ArrayList(); + List docOrds = new ArrayList(); for (OrdAndId ordAndId : ordsAndIds) { if (docId == ordAndId.id) { docOrds.add(ordAndId.ord); } else { if (!docOrds.isEmpty()) { assertThat(docs.getOrd(docId), equalTo(docOrds.get(0))); - IntsRef ref = docs.getOrds(docId); + LongsRef ref = docs.getOrds(docId); assertThat(ref.offset, equalTo(0)); for (int i = ref.offset; i < ref.length; i++) { - assertThat(ref.ints[i], equalTo(docOrds.get(i))); + assertThat(ref.longs[i], equalTo(docOrds.get(i))); } - final int[] array = new int[docOrds.size()]; + final long[] array = new long[docOrds.size()]; for (int i = 0; i < array.length; i++) { array[i] = docOrds.get(i); } assertIter(docs.getIter(docId), array); } for (int i = docId + 1; i < ordAndId.id; i++) { - assertThat(docs.getOrd(i), equalTo(0)); + assertThat(docs.getOrd(i), equalTo(0L)); } docId = ordAndId.id; docOrds.clear(); @@ -137,10 +140,10 @@ public abstract class MultiOrdinalsTests { } public static class OrdAndId { - final int ord; + final long ord; final int id; - public OrdAndId(int ord, int id) { + public OrdAndId(long ord, int id) { this.ord = ord; this.id = id; } @@ -150,7 +153,7 @@ public abstract class MultiOrdinalsTests { final int prime = 31; int result = 1; result = prime * result + id; - result = prime * result + ord; + result = prime * result + (int) ord; return result; } @@ -174,7 +177,7 @@ public abstract class MultiOrdinalsTests { @Test public void testOrdinals() throws Exception { int maxDoc = 7; - int maxOrds = 32; + long maxOrds = 32; OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc); builder.nextOrdinal(); // 1 builder.addDoc(1).addDoc(4).addDoc(5).addDoc(6); @@ -186,97 +189,99 @@ public abstract class MultiOrdinalsTests { builder.addDoc(0).addDoc(4).addDoc(5).addDoc(6); builder.nextOrdinal(); // 5 builder.addDoc(4).addDoc(5).addDoc(6); - int ord = builder.nextOrdinal(); // 6 + long ord = builder.nextOrdinal(); // 6 builder.addDoc(4).addDoc(5).addDoc(6); - for (int i = ord; i < maxOrds; i++) { + for (long i = ord; i < maxOrds; i++) { builder.nextOrdinal(); builder.addDoc(5).addDoc(6); } - + + long[][] ordinalPlan = new long[][] { + {2, 4}, + {1}, + {3}, + {}, + {1, 3, 4, 5, 6}, + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}, + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32} + }; Ordinals ordinals = creationMultiOrdinals(builder); Ordinals.Docs docs = ordinals.ordinals(); - assertThat(docs.getNumDocs(), equalTo(maxDoc)); - assertThat(docs.getNumOrds(), equalTo(maxOrds)); - assertThat(docs.getMaxOrd(), equalTo(maxOrds + 1)); // Includes null ord - assertThat(docs.isMultiValued(), equalTo(true)); - assertThat(ordinals.getMemorySizeInBytes(), greaterThan(0l)); - - // Document 1 - assertThat(docs.getOrd(0), equalTo(2)); - IntsRef ref = docs.getOrds(0); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(2)); - assertThat(ref.ints[1], equalTo(4)); - assertThat(ref.length, equalTo(2)); - assertIter(docs.getIter(0), 2, 4); - - // Document 2 - assertThat(docs.getOrd(1), equalTo(1)); - ref = docs.getOrds(1); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(1)); - assertThat(ref.length, equalTo(1)); - assertIter(docs.getIter(1), 1); - - // Document 3 - assertThat(docs.getOrd(2), equalTo(3)); - ref = docs.getOrds(2); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(3)); - assertThat(ref.length, equalTo(1)); - assertIter(docs.getIter(2), 3); - - // Document 4 - assertThat(docs.getOrd(3), equalTo(0)); - ref = docs.getOrds(3); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.length, equalTo(0)); - assertIter(docs.getIter(3)); - - // Document 5 - assertThat(docs.getOrd(4), equalTo(1)); - ref = docs.getOrds(4); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(1)); - assertThat(ref.ints[1], equalTo(3)); - assertThat(ref.ints[2], equalTo(4)); - assertThat(ref.ints[3], equalTo(5)); - assertThat(ref.ints[4], equalTo(6)); - assertThat(ref.length, equalTo(5)); - assertIter(docs.getIter(4), 1, 3, 4, 5, 6); - - // Document 6 - assertThat(docs.getOrd(5), equalTo(1)); - ref = docs.getOrds(5); - assertThat(ref.offset, equalTo(0)); - int[] expectedOrds = new int[maxOrds]; - for (int i = 0; i < maxOrds; i++) { - expectedOrds[i] = i + 1; - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertIter(docs.getIter(5), expectedOrds); - assertThat(ref.length, equalTo(maxOrds)); - - // Document 7 - assertThat(docs.getOrd(6), equalTo(1)); - ref = docs.getOrds(6); - assertThat(ref.offset, equalTo(0)); - expectedOrds = new int[maxOrds]; - for (int i = 0; i < maxOrds; i++) { - expectedOrds[i] = i + 1; - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertIter(docs.getIter(6), expectedOrds); - assertThat(ref.length, equalTo(maxOrds)); + assertEquals(docs, ordinalPlan); } - protected static void assertIter(Ordinals.Docs.Iter iter, int... expectedOrdinals) { - for (int expectedOrdinal : expectedOrdinals) { + protected static void assertIter(Ordinals.Docs.Iter iter, long... expectedOrdinals) { + for (long expectedOrdinal : expectedOrdinals) { assertThat(iter.next(), equalTo(expectedOrdinal)); } - assertThat(iter.next(), equalTo(0)); // Last one should always be 0 - assertThat(iter.next(), equalTo(0)); // Just checking it stays 0 + assertThat(iter.next(), equalTo(0L)); // Last one should always be 0 + assertThat(iter.next(), equalTo(0L)); // Just checking it stays 0 + } + + @Test + public void testMultiValuesDocsWithOverlappingStorageArrays() throws Exception { + int maxDoc = 7; + long maxOrds = 15; + OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc); + for (int i = 0; i < maxOrds; i++) { + builder.nextOrdinal(); + if (i < 10) { + builder.addDoc(0); + } + builder.addDoc(1); + if (i == 0) { + builder.addDoc(2); + } + if (i < 5) { + builder.addDoc(3); + + } + if (i < 6) { + builder.addDoc(4); + + } + if (i == 1) { + builder.addDoc(5); + } + if (i < 10) { + builder.addDoc(6); + } + } + + long[][] ordinalPlan = new long[][] { + {1,2,3,4,5,6,7,8,9,10}, + {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}, + {1}, + {1,2,3,4,5}, + {1,2,3,4,5,6}, + {2}, + {1,2,3,4,5,6,7,8,9,10} + }; + + Ordinals ordinals = new MultiOrdinals(builder); + Ordinals.Docs docs = ordinals.ordinals(); + assertEquals(docs, ordinalPlan); + } + + private void assertEquals(Ordinals.Docs docs, long[][] ordinalPlan) { + long numOrds = 0; + for (int doc = 0; doc < ordinalPlan.length; ++doc) { + if (ordinalPlan[doc].length > 0) { + numOrds = Math.max(numOrds, ordinalPlan[doc][ordinalPlan[doc].length - 1]); + } + } + assertThat(docs.getNumDocs(), equalTo(ordinalPlan.length)); + assertThat(docs.getNumOrds(), equalTo(numOrds)); // Includes null ord + assertThat(docs.getMaxOrd(), equalTo(numOrds + 1)); + assertThat(docs.isMultiValued(), equalTo(true)); + for (int doc = 0; doc < ordinalPlan.length; ++doc) { + LongsRef ref = docs.getOrds(doc); + assertThat(ref.offset, equalTo(0)); + long[] ords = ordinalPlan[doc]; + assertThat(ref, equalTo(new LongsRef(ords, 0, ords.length))); + assertIter(docs.getIter(doc), ords); + } } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SingleOrdinalsTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SingleOrdinalsTests.java index 07fb09a38d3..69262ea0ea1 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SingleOrdinalsTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SingleOrdinalsTests.java @@ -40,9 +40,9 @@ public class SingleOrdinalsTests { public void testSvValues() throws IOException { int numDocs = 1000000; int numOrdinals = numDocs / 4; - Map controlDocToOrdinal = new HashMap(); + Map controlDocToOrdinal = new HashMap(); OrdinalsBuilder builder = new OrdinalsBuilder(numDocs); - int ordinal = builder.nextOrdinal(); + long ordinal = builder.nextOrdinal(); for (int doc = 0; doc < numDocs; doc++) { if (doc % numOrdinals == 0) { ordinal = builder.nextOrdinal(); @@ -56,7 +56,7 @@ public class SingleOrdinalsTests { Ordinals.Docs docs = ords.ordinals(); assertThat(controlDocToOrdinal.size(), equalTo(docs.getNumDocs())); - for (Map.Entry entry : controlDocToOrdinal.entrySet()) { + for (Map.Entry entry : controlDocToOrdinal.entrySet()) { assertThat(entry.getValue(), equalTo(docs.getOrd(entry.getKey()))); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SparseMultiOrdinalsTests.java b/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SparseMultiOrdinalsTests.java deleted file mode 100644 index a56c38b37c7..00000000000 --- a/src/test/java/org/elasticsearch/test/unit/index/fielddata/ordinals/SparseMultiOrdinalsTests.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.test.unit.index.fielddata.ordinals; - -import org.apache.lucene.util.IntsRef; -import org.elasticsearch.ElasticSearchException; -import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.common.settings.ImmutableSettings.Builder; -import org.elasticsearch.index.fielddata.ordinals.Ordinals; -import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder; -import org.elasticsearch.index.fielddata.ordinals.SparseMultiArrayOrdinals; -import org.testng.annotations.Test; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.testng.Assert.fail; - -/** - */ -public class SparseMultiOrdinalsTests extends MultiOrdinalsTests { - - @Override - protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) { - settings.put("multi_ordinals", "sparse"); - return builder.build(settings.build()); - } - - @Test - public void testMultiValuesSurpassOrdinalsLimit() throws Exception { - OrdinalsBuilder builder = new OrdinalsBuilder(2); - int maxOrds = 128; - for (int i = 0; i < maxOrds; i++) { - builder.nextOrdinal(); - if (i == 2 || i == 4) { - builder.addDoc(0); - } - builder.addDoc(1); - - } - - try { - Builder builder2 = ImmutableSettings.builder(); - builder2.put("multi_ordinals_max_docs", 64); - creationMultiOrdinals(builder, builder2); - fail("Exception should have been throwed"); - } catch (ElasticSearchException e) { - - } - } - - @Test - public void testMultiValuesDocsWithOverlappingStorageArrays() throws Exception { - int maxDoc = 7; - int maxOrds = 15; - OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc); - for (int i = 0; i < maxOrds; i++) { - builder.nextOrdinal(); - if (i < 10) { - builder.addDoc(0); - } - builder.addDoc(1); - if (i == 0) { - builder.addDoc(2); - } - if (i < 5) { - builder.addDoc(3); - - } - if (i < 6) { - builder.addDoc(4); - - } - if (i == 1) { - builder.addDoc(5); - } - if (i < 10) { - builder.addDoc(6); - } - } - - Ordinals ordinals = new SparseMultiArrayOrdinals(builder, 64); - Ordinals.Docs docs = ordinals.ordinals(); - assertThat(docs.getNumDocs(), equalTo(maxDoc)); - assertThat(docs.getNumOrds(), equalTo(maxOrds)); // Includes null ord - assertThat(docs.isMultiValued(), equalTo(true)); - - // Document 1 - assertThat(docs.getOrd(0), equalTo(1)); - IntsRef ref = docs.getOrds(0); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 10; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(10)); - - // Document 2 - assertThat(docs.getOrd(1), equalTo(1)); - ref = docs.getOrds(1); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 15; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(15)); - - // Document 3 - assertThat(docs.getOrd(2), equalTo(1)); - ref = docs.getOrds(2); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(1)); - assertThat(ref.length, equalTo(1)); - - // Document 4 - assertThat(docs.getOrd(3), equalTo(1)); - ref = docs.getOrds(3); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 5; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(5)); - - // Document 5 - assertThat(docs.getOrd(4), equalTo(1)); - ref = docs.getOrds(4); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 6; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(6)); - - // Document 6 - assertThat(docs.getOrd(5), equalTo(2)); - ref = docs.getOrds(5); - assertThat(ref.offset, equalTo(0)); - assertThat(ref.ints[0], equalTo(2)); - assertThat(ref.length, equalTo(1)); - - // Document 7 - assertThat(docs.getOrd(6), equalTo(1)); - ref = docs.getOrds(6); - assertThat(ref.offset, equalTo(0)); - for (int i = 0; i < 10; i++) { - assertThat(ref.ints[i], equalTo(i + 1)); - } - assertThat(ref.length, equalTo(10)); - } - -}