Make field data able to support more than 2B ordinals per segment.
Although segments are limited to 2B documents, there is not limit on the number of unique values that a segment may store. This commit replaces 'int' with 'long' every time a number is used to represent an ordinal and modifies the data-structures used to store ordinals so that they can actually support more than 2B ordinals per segment. This commit also improves memory usage of the multi-ordinals data-structures and the transient memory usage which is required to build them (OrdinalsBuilder) by using Lucene's PackedInts data-structures. In the end, loading the ordinals mapping from disk may be a little slower, field-data-based features such as faceting may be slightly slower or faster depending on whether being nicer to the CPU caches balances the overhead of the additional abstraction or not, and memory usage should be better in all cases, especially when the size of the ordinals mapping is not negligible compared to the size of the values (numeric data for example). Close #3189
This commit is contained in:
parent
4d05c9cfd5
commit
12d9268db2
|
@ -0,0 +1,171 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import static org.apache.lucene.util.packed.XPackedInts.checkBlockSize;
|
||||||
|
import static org.apache.lucene.util.packed.XPackedInts.numBlocks;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base implementation for {@link XPagedMutable} and {@link PagedGrowableWriter}.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
abstract class XAbstractPagedMutable<T extends XAbstractPagedMutable<T>> {
|
||||||
|
|
||||||
|
static {
|
||||||
|
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||||
|
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||||
|
}
|
||||||
|
|
||||||
|
static final int MIN_BLOCK_SIZE = 1 << 6;
|
||||||
|
static final int MAX_BLOCK_SIZE = 1 << 30;
|
||||||
|
|
||||||
|
final long size;
|
||||||
|
final int pageShift;
|
||||||
|
final int pageMask;
|
||||||
|
final PackedInts.Mutable[] subMutables;
|
||||||
|
final int bitsPerValue;
|
||||||
|
|
||||||
|
XAbstractPagedMutable(int bitsPerValue, long size, int pageSize) {
|
||||||
|
this.bitsPerValue = bitsPerValue;
|
||||||
|
this.size = size;
|
||||||
|
pageShift = checkBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
|
||||||
|
pageMask = pageSize - 1;
|
||||||
|
final int numPages = numBlocks(size, pageSize);
|
||||||
|
subMutables = new PackedInts.Mutable[numPages];
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final void fillPages() {
|
||||||
|
final int numPages = numBlocks(size, pageSize());
|
||||||
|
for (int i = 0; i < numPages; ++i) {
|
||||||
|
// do not allocate for more entries than necessary on the last page
|
||||||
|
final int valueCount = i == numPages - 1 ? lastPageSize(size) : pageSize();
|
||||||
|
subMutables[i] = newMutable(valueCount, bitsPerValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract PackedInts.Mutable newMutable(int valueCount, int bitsPerValue);
|
||||||
|
|
||||||
|
final int lastPageSize(long size) {
|
||||||
|
final int sz = indexInPage(size);
|
||||||
|
return sz == 0 ? pageSize() : sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int pageSize() {
|
||||||
|
return pageMask + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The number of values. */
|
||||||
|
public final long size() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int pageIndex(long index) {
|
||||||
|
return (int) (index >>> pageShift);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int indexInPage(long index) {
|
||||||
|
return (int) index & pageMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get value at <code>index</code>. */
|
||||||
|
public final long get(long index) {
|
||||||
|
assert index >= 0 && index < size;
|
||||||
|
final int pageIndex = pageIndex(index);
|
||||||
|
final int indexInPage = indexInPage(index);
|
||||||
|
return subMutables[pageIndex].get(indexInPage);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Set value at <code>index</code>. */
|
||||||
|
public final void set(long index, long value) {
|
||||||
|
assert index >= 0 && index < size;
|
||||||
|
final int pageIndex = pageIndex(index);
|
||||||
|
final int indexInPage = indexInPage(index);
|
||||||
|
subMutables[pageIndex].set(indexInPage, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected long baseRamBytesUsed() {
|
||||||
|
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||||
|
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||||
|
+ RamUsageEstimator.NUM_BYTES_LONG
|
||||||
|
+ 3 * RamUsageEstimator.NUM_BYTES_INT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the number of bytes used by this object. */
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed());
|
||||||
|
bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length);
|
||||||
|
for (PackedInts.Mutable gw : subMutables) {
|
||||||
|
bytesUsed += gw.ramBytesUsed();
|
||||||
|
}
|
||||||
|
return bytesUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract T newUnfilledCopy(long newSize);
|
||||||
|
|
||||||
|
/** Create a new copy of size <code>newSize</code> based on the content of
|
||||||
|
* this buffer. This method is much more efficient than creating a new
|
||||||
|
* instance and copying values one by one. */
|
||||||
|
public final T resize(long newSize) {
|
||||||
|
final T copy = newUnfilledCopy(newSize);
|
||||||
|
final int numCommonPages = Math.min(copy.subMutables.length, subMutables.length);
|
||||||
|
final long[] copyBuffer = new long[1024];
|
||||||
|
for (int i = 0; i < copy.subMutables.length; ++i) {
|
||||||
|
final int valueCount = i == copy.subMutables.length - 1 ? lastPageSize(newSize) : pageSize();
|
||||||
|
final int bpv = i < numCommonPages ? subMutables[i].getBitsPerValue() : this.bitsPerValue;
|
||||||
|
copy.subMutables[i] = newMutable(valueCount, bpv);
|
||||||
|
if (i < numCommonPages) {
|
||||||
|
final int copyLength = Math.min(valueCount, subMutables[i].size());
|
||||||
|
XPackedInts.copy(subMutables[i], 0, copy.subMutables[i], 0, copyLength, copyBuffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Similar to {@link ArrayUtil#grow(long[], int)}. */
|
||||||
|
public final T grow(long minSize) {
|
||||||
|
assert minSize >= 0;
|
||||||
|
if (minSize <= size()) {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
final T result = (T) this;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
long extra = minSize >>> 3;
|
||||||
|
if (extra < 3) {
|
||||||
|
extra = 3;
|
||||||
|
}
|
||||||
|
final long newSize = minSize + extra;
|
||||||
|
return resize(newSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Similar to {@link ArrayUtil#grow(long[])}. */
|
||||||
|
public final T grow() {
|
||||||
|
return grow(size() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final String toString() {
|
||||||
|
return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,162 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements {@link XPackedInts.Mutable}, but grows the
|
||||||
|
* bit count of the underlying packed ints on-demand.
|
||||||
|
* <p>Beware that this class will accept to set negative values but in order
|
||||||
|
* to do this, it will grow the number of bits per value to 64.
|
||||||
|
*
|
||||||
|
* <p>@lucene.internal</p>
|
||||||
|
*/
|
||||||
|
public class XGrowableWriter implements PackedInts.Mutable {
|
||||||
|
|
||||||
|
static {
|
||||||
|
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||||
|
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||||
|
}
|
||||||
|
|
||||||
|
private long currentMask;
|
||||||
|
private PackedInts.Mutable current;
|
||||||
|
private final float acceptableOverheadRatio;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param startBitsPerValue the initial number of bits per value, may grow depending on the data
|
||||||
|
* @param valueCount the number of values
|
||||||
|
* @param acceptableOverheadRatio an acceptable overhead ratio
|
||||||
|
*/
|
||||||
|
public XGrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
|
||||||
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
|
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
|
||||||
|
currentMask = mask(current.getBitsPerValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long mask(int bitsPerValue) {
|
||||||
|
return bitsPerValue == 64 ? ~0L : PackedInts.maxValue(bitsPerValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long get(int index) {
|
||||||
|
return current.get(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return current.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getBitsPerValue() {
|
||||||
|
return current.getBitsPerValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
public PackedInts.Mutable getMutable() {
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getArray() {
|
||||||
|
return current.getArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasArray() {
|
||||||
|
return current.hasArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ensureCapacity(long value) {
|
||||||
|
if ((value & currentMask) == value) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final int bitsRequired = value < 0 ? 64 : PackedInts.bitsRequired(value);
|
||||||
|
assert bitsRequired > current.getBitsPerValue();
|
||||||
|
final int valueCount = size();
|
||||||
|
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bitsRequired, acceptableOverheadRatio);
|
||||||
|
PackedInts.copy(current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||||
|
current = next;
|
||||||
|
currentMask = mask(current.getBitsPerValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void set(int index, long value) {
|
||||||
|
ensureCapacity(value);
|
||||||
|
current.set(index, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clear() {
|
||||||
|
current.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
public XGrowableWriter resize(int newSize) {
|
||||||
|
XGrowableWriter next = new XGrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
|
||||||
|
final int limit = Math.min(size(), newSize);
|
||||||
|
PackedInts.copy(current, 0, next, 0, limit, PackedInts.DEFAULT_BUFFER_SIZE);
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(int index, long[] arr, int off, int len) {
|
||||||
|
return current.get(index, arr, off, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int set(int index, long[] arr, int off, int len) {
|
||||||
|
long max = 0;
|
||||||
|
for (int i = off, end = off + len; i < end; ++i) {
|
||||||
|
// bitwise or is nice because either all values are positive and the
|
||||||
|
// or-ed result will require as many bits per value as the max of the
|
||||||
|
// values, or one of them is negative and the result will be negative,
|
||||||
|
// forcing GrowableWriter to use 64 bits per value
|
||||||
|
max |= arr[i];
|
||||||
|
}
|
||||||
|
ensureCapacity(max);
|
||||||
|
return current.set(index, arr, off, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fill(int fromIndex, int toIndex, long val) {
|
||||||
|
ensureCapacity(val);
|
||||||
|
current.fill(fromIndex, toIndex, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return RamUsageEstimator.alignObjectSize(
|
||||||
|
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||||
|
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||||
|
+ RamUsageEstimator.NUM_BYTES_LONG
|
||||||
|
+ RamUsageEstimator.NUM_BYTES_FLOAT)
|
||||||
|
+ current.ramBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void save(DataOutput out) throws IOException {
|
||||||
|
current.save(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Mutable;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simplistic compression for array of unsigned long values.
|
||||||
|
* Each value is >= 0 and <= a specified maximum value. The
|
||||||
|
* values are stored as packed ints, with each value
|
||||||
|
* consuming a fixed number of bits.
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public class XPackedInts {
|
||||||
|
|
||||||
|
static {
|
||||||
|
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||||
|
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */
|
||||||
|
static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
|
||||||
|
assert buf.length > 0;
|
||||||
|
int remaining = 0;
|
||||||
|
while (len > 0) {
|
||||||
|
final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
|
||||||
|
assert read > 0;
|
||||||
|
srcPos += read;
|
||||||
|
len -= read;
|
||||||
|
remaining += read;
|
||||||
|
final int written = dest.set(destPos, buf, 0, remaining);
|
||||||
|
assert written > 0;
|
||||||
|
destPos += written;
|
||||||
|
if (written < remaining) {
|
||||||
|
System.arraycopy(buf, written, buf, 0, remaining - written);
|
||||||
|
}
|
||||||
|
remaining -= written;
|
||||||
|
}
|
||||||
|
while (remaining > 0) {
|
||||||
|
final int written = dest.set(destPos, buf, 0, remaining);
|
||||||
|
destPos += written;
|
||||||
|
remaining -= written;
|
||||||
|
System.arraycopy(buf, written, buf, 0, remaining);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check that the block size is a power of 2, in the right bounds, and return
|
||||||
|
* its log in base 2. */
|
||||||
|
static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
|
||||||
|
if (blockSize < minBlockSize || blockSize > maxBlockSize) {
|
||||||
|
throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
|
||||||
|
}
|
||||||
|
if ((blockSize & (blockSize - 1)) != 0) {
|
||||||
|
throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
|
||||||
|
}
|
||||||
|
return Integer.numberOfTrailingZeros(blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the number of blocks required to store <code>size</code> values on
|
||||||
|
* <code>blockSize</code>. */
|
||||||
|
static int numBlocks(long size, int blockSize) {
|
||||||
|
final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
|
||||||
|
if ((long) numBlocks * blockSize < size) {
|
||||||
|
throw new IllegalArgumentException("size is too large for this block size");
|
||||||
|
}
|
||||||
|
return numBlocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
package org.apache.lucene.util.packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Mutable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link XPagedGrowableWriter}. This class slices data into fixed-size blocks
|
||||||
|
* which have independent numbers of bits per value and grow on-demand.
|
||||||
|
* <p>You should use this class instead of {@link AppendingLongBuffer} only when
|
||||||
|
* you need random write-access. Otherwise this class will likely be slower and
|
||||||
|
* less memory-efficient.
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public final class XPagedGrowableWriter extends XAbstractPagedMutable<XPagedGrowableWriter> {
|
||||||
|
|
||||||
|
static {
|
||||||
|
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
|
||||||
|
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||||
|
}
|
||||||
|
|
||||||
|
final float acceptableOverheadRatio;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new {@link XPagedGrowableWriter} instance.
|
||||||
|
*
|
||||||
|
* @param size the number of values to store.
|
||||||
|
* @param pageSize the number of values per page
|
||||||
|
* @param startBitsPerValue the initial number of bits per value
|
||||||
|
* @param acceptableOverheadRatio an acceptable overhead ratio
|
||||||
|
*/
|
||||||
|
public XPagedGrowableWriter(long size, int pageSize,
|
||||||
|
int startBitsPerValue, float acceptableOverheadRatio) {
|
||||||
|
this(size, pageSize, startBitsPerValue, acceptableOverheadRatio, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
XPagedGrowableWriter(long size, int pageSize,int startBitsPerValue, float acceptableOverheadRatio, boolean fillPages) {
|
||||||
|
super(startBitsPerValue, size, pageSize);
|
||||||
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
|
if (fillPages) {
|
||||||
|
fillPages();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Mutable newMutable(int valueCount, int bitsPerValue) {
|
||||||
|
return new XGrowableWriter(bitsPerValue, valueCount, acceptableOverheadRatio);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected XPagedGrowableWriter newUnfilledCopy(long newSize) {
|
||||||
|
return new XPagedGrowableWriter(newSize, pageSize(), bitsPerValue, acceptableOverheadRatio, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected long baseRamBytesUsed() {
|
||||||
|
return super.baseRamBytesUsed() + RamUsageEstimator.NUM_BYTES_FLOAT;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,68 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
|
||||||
|
/** Common implementation for array lists that slice data into fixed-size blocks. */
|
||||||
|
abstract class AbstractBigArray {
|
||||||
|
|
||||||
|
private final int pageShift;
|
||||||
|
private final int pageMask;
|
||||||
|
protected long size;
|
||||||
|
|
||||||
|
protected AbstractBigArray(int pageSize) {
|
||||||
|
Preconditions.checkArgument(pageSize >= 128, "pageSize must be >= 128");
|
||||||
|
Preconditions.checkArgument((pageSize & (pageSize - 1)) == 0, "pageSize must be a power of two");
|
||||||
|
this.pageShift = Integer.numberOfTrailingZeros(pageSize);
|
||||||
|
this.pageMask = pageSize - 1;
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int numPages(long capacity) {
|
||||||
|
final long numPages = (capacity + pageMask) >>> pageShift;
|
||||||
|
Preconditions.checkArgument(numPages <= Integer.MAX_VALUE, "pageSize=" + (pageMask + 1) + " is too small for such as capacity: " + capacity);
|
||||||
|
return (int) numPages;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int pageSize() {
|
||||||
|
return pageMask + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int pageIndex(long index) {
|
||||||
|
return (int) (index >>> pageShift);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int indexInPage(long index) {
|
||||||
|
return (int) (index & pageMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
public final long size() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract int numBytesPerElement();
|
||||||
|
|
||||||
|
public final long sizeInBytes() {
|
||||||
|
// rough approximate, we only take into account the size of the values, not the overhead of the array objects
|
||||||
|
return ((long) pageIndex(size - 1) + 1) * pageSize() * numBytesPerElement();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.elasticsearch.common.RamUsage;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/** Float array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of
|
||||||
|
* configurable length. */
|
||||||
|
public final class BigDoubleArrayList extends AbstractBigArray {
|
||||||
|
|
||||||
|
/** Default page size, 16KB of memory per page. */
|
||||||
|
private static final int DEFAULT_PAGE_SIZE = 1 << 11;
|
||||||
|
|
||||||
|
private double[][] pages;
|
||||||
|
|
||||||
|
public BigDoubleArrayList(int pageSize, long initialCapacity) {
|
||||||
|
super(pageSize);
|
||||||
|
pages = new double[numPages(initialCapacity)][];
|
||||||
|
}
|
||||||
|
|
||||||
|
public BigDoubleArrayList(long initialCapacity) {
|
||||||
|
this(DEFAULT_PAGE_SIZE, initialCapacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BigDoubleArrayList() {
|
||||||
|
this(1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
public double get(long index) {
|
||||||
|
assert index >= 0 && index < size;
|
||||||
|
final int pageIndex = pageIndex(index);
|
||||||
|
final int indexInPage = indexInPage(index);
|
||||||
|
return pages[pageIndex][indexInPage];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(double d) {
|
||||||
|
final int pageIndex = pageIndex(size);
|
||||||
|
if (pageIndex >= pages.length) {
|
||||||
|
final int newLength = ArrayUtil.oversize(pageIndex + 1, numBytesPerElement());
|
||||||
|
pages = Arrays.copyOf(pages, newLength);
|
||||||
|
}
|
||||||
|
if (pages[pageIndex] == null) {
|
||||||
|
pages[pageIndex] = new double[pageSize()];
|
||||||
|
}
|
||||||
|
final int indexInPage = indexInPage(size);
|
||||||
|
pages[pageIndex][indexInPage] = d;
|
||||||
|
++size;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int numBytesPerElement() {
|
||||||
|
return RamUsage.NUM_BYTES_DOUBLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.elasticsearch.common.RamUsage;
|
||||||
|
|
||||||
|
/** Float array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of
|
||||||
|
* configurable length. */
|
||||||
|
public final class BigFloatArrayList extends AbstractBigArray {
|
||||||
|
|
||||||
|
/** Default page size, 16KB of memory per page. */
|
||||||
|
private static final int DEFAULT_PAGE_SIZE = 1 << 12;
|
||||||
|
|
||||||
|
private float[][] pages;
|
||||||
|
|
||||||
|
public BigFloatArrayList(int pageSize, long initialCapacity) {
|
||||||
|
super(pageSize);
|
||||||
|
pages = new float[numPages(initialCapacity)][];
|
||||||
|
}
|
||||||
|
|
||||||
|
public BigFloatArrayList(long initialCapacity) {
|
||||||
|
this(DEFAULT_PAGE_SIZE, initialCapacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BigFloatArrayList() {
|
||||||
|
this(1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
public float get(long index) {
|
||||||
|
assert index >= 0 && index < size;
|
||||||
|
final int pageIndex = pageIndex(index);
|
||||||
|
final int indexInPage = indexInPage(index);
|
||||||
|
return pages[pageIndex][indexInPage];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(float f) {
|
||||||
|
final int pageIndex = pageIndex(size);
|
||||||
|
pages = ArrayUtil.grow(pages, pageIndex + 1);
|
||||||
|
if (pages[pageIndex] == null) {
|
||||||
|
pages[pageIndex] = new float[pageSize()];
|
||||||
|
}
|
||||||
|
final int indexInPage = indexInPage(size);
|
||||||
|
pages[pageIndex][indexInPage] = f;
|
||||||
|
++size;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int numBytesPerElement() {
|
||||||
|
return RamUsage.NUM_BYTES_FLOAT;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,69 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.RamUsage;
|
||||||
|
|
||||||
|
/** Int array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of
|
||||||
|
* configurable length. */
|
||||||
|
public final class BigIntArray extends AbstractBigArray implements IntArray {
|
||||||
|
|
||||||
|
/** Default page size, 16KB of memory per page. */
|
||||||
|
public static final int DEFAULT_PAGE_SIZE = 1 << 12;
|
||||||
|
|
||||||
|
private int[][] pages;
|
||||||
|
|
||||||
|
public BigIntArray(int pageSize, long size) {
|
||||||
|
super(pageSize);
|
||||||
|
this.size = size;
|
||||||
|
pages = new int[numPages(size)][];
|
||||||
|
for (int i = 0; i < pages.length; ++i) {
|
||||||
|
pages[i] = new int[pageSize()];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public BigIntArray(long size) {
|
||||||
|
this(DEFAULT_PAGE_SIZE, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int get(long index) {
|
||||||
|
final int pageIndex = pageIndex(index);
|
||||||
|
final int indexInPage = indexInPage(index);
|
||||||
|
return pages[pageIndex][indexInPage];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void set(long index, int value) {
|
||||||
|
final int pageIndex = pageIndex(index);
|
||||||
|
final int indexInPage = indexInPage(index);
|
||||||
|
pages[pageIndex][indexInPage] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int increment(long index, int inc) {
|
||||||
|
final int pageIndex = pageIndex(index);
|
||||||
|
final int indexInPage = indexInPage(index);
|
||||||
|
return pages[pageIndex][indexInPage] += inc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int numBytesPerElement() {
|
||||||
|
return RamUsage.NUM_BYTES_INT;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
/** Abstraction of an array of integer values. */
|
||||||
|
public interface IntArray {
|
||||||
|
|
||||||
|
/** Get an element given its index. */
|
||||||
|
public abstract int get(long index);
|
||||||
|
|
||||||
|
/** Set a value at the given index. */
|
||||||
|
public abstract void set(long index, int value);
|
||||||
|
|
||||||
|
/** Increment value at the given index by <code>inc</code> and return the value. */
|
||||||
|
public abstract int increment(long index, int inc);
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
/** Utility methods to work with {@link IntArray}s. */
|
||||||
|
public class IntArrays {
|
||||||
|
|
||||||
|
private IntArrays() {}
|
||||||
|
|
||||||
|
/** Return a {@link IntArray} view over the provided array. */
|
||||||
|
public static IntArray wrap(final int[] array) {
|
||||||
|
return new IntArray() {
|
||||||
|
|
||||||
|
private void checkIndex(long index) {
|
||||||
|
if (index > Integer.MAX_VALUE) {
|
||||||
|
throw new IndexOutOfBoundsException(Long.toString(index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void set(long index, int value) {
|
||||||
|
checkIndex(index);
|
||||||
|
array[(int) index] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int increment(long index, int inc) {
|
||||||
|
checkIndex(index);
|
||||||
|
return array[(int) index] += inc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int get(long index) {
|
||||||
|
checkIndex(index);
|
||||||
|
return array[(int) index];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return a newly allocated {@link IntArray} of the given length or more. */
|
||||||
|
public static IntArray allocate(long length) {
|
||||||
|
if (length <= BigIntArray.DEFAULT_PAGE_SIZE) {
|
||||||
|
return wrap(new int[(int) length]);
|
||||||
|
} else {
|
||||||
|
return new BigIntArray(length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -120,10 +120,10 @@ public abstract class BytesValues {
|
||||||
public static class Single implements Iter {
|
public static class Single implements Iter {
|
||||||
|
|
||||||
protected BytesRef value;
|
protected BytesRef value;
|
||||||
protected int ord;
|
protected long ord;
|
||||||
protected boolean done;
|
protected boolean done;
|
||||||
|
|
||||||
public Single reset(BytesRef value, int ord) {
|
public Single reset(BytesRef value, long ord) {
|
||||||
this.value = value;
|
this.value = value;
|
||||||
this.ord = ord;
|
this.ord = ord;
|
||||||
this.done = false;
|
this.done = false;
|
||||||
|
@ -149,8 +149,8 @@ public abstract class BytesValues {
|
||||||
|
|
||||||
static class Multi implements Iter {
|
static class Multi implements Iter {
|
||||||
|
|
||||||
protected int innerOrd;
|
protected long innerOrd;
|
||||||
protected int ord;
|
protected long ord;
|
||||||
protected BytesValues.WithOrdinals withOrds;
|
protected BytesValues.WithOrdinals withOrds;
|
||||||
protected Ordinals.Docs.Iter ordsIter;
|
protected Ordinals.Docs.Iter ordsIter;
|
||||||
protected final BytesRef scratch = new BytesRef();
|
protected final BytesRef scratch = new BytesRef();
|
||||||
|
@ -226,7 +226,7 @@ public abstract class BytesValues {
|
||||||
return ordinals;
|
return ordinals;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BytesRef getValueByOrd(int ord) {
|
public BytesRef getValueByOrd(long ord) {
|
||||||
return getValueScratchByOrd(ord, scratch);
|
return getValueScratchByOrd(ord, scratch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -247,7 +247,7 @@ public abstract class BytesValues {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getValue(int docId) {
|
public BytesRef getValue(int docId) {
|
||||||
final int ord = ordinals.getOrd(docId);
|
final long ord = ordinals.getOrd(docId);
|
||||||
if (ord == 0) {
|
if (ord == 0) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -268,7 +268,7 @@ public abstract class BytesValues {
|
||||||
* result which will also be returned. If there is no value for this docId, the length will be 0.
|
* result which will also be returned. If there is no value for this docId, the length will be 0.
|
||||||
* Note, the bytes are not "safe".
|
* Note, the bytes are not "safe".
|
||||||
*/
|
*/
|
||||||
public abstract BytesRef getValueScratchByOrd(int ord, BytesRef ret);
|
public abstract BytesRef getValueScratchByOrd(long ord, BytesRef ret);
|
||||||
|
|
||||||
public static class Empty extends WithOrdinals {
|
public static class Empty extends WithOrdinals {
|
||||||
|
|
||||||
|
@ -277,7 +277,7 @@ public abstract class BytesValues {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
|
public BytesRef getValueScratchByOrd(long ord, BytesRef ret) {
|
||||||
ret.length = 0;
|
ret.length = 0;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,7 +120,7 @@ public abstract class DoubleValues {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final double getValueMissing(int docId, double missingValue) {
|
public final double getValueMissing(int docId, double missingValue) {
|
||||||
final int ord = ordinals.getOrd(docId);
|
final long ord = ordinals.getOrd(docId);
|
||||||
if (ord == 0) {
|
if (ord == 0) {
|
||||||
return missingValue;
|
return missingValue;
|
||||||
} else {
|
} else {
|
||||||
|
@ -128,7 +128,7 @@ public abstract class DoubleValues {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract double getValueByOrd(int ord);
|
public abstract double getValueByOrd(long ord);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Iter getIter(int docId) {
|
public final Iter getIter(int docId) {
|
||||||
|
@ -184,8 +184,8 @@ public abstract class DoubleValues {
|
||||||
|
|
||||||
static class Multi implements Iter {
|
static class Multi implements Iter {
|
||||||
|
|
||||||
private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter;
|
private Ordinals.Docs.Iter ordsIter;
|
||||||
private int ord;
|
private long ord;
|
||||||
private WithOrdinals values;
|
private WithOrdinals values;
|
||||||
|
|
||||||
public Multi(WithOrdinals values) {
|
public Multi(WithOrdinals values) {
|
||||||
|
|
|
@ -118,7 +118,7 @@ public abstract class LongValues {
|
||||||
return getValueByOrd(ordinals.getOrd(docId));
|
return getValueByOrd(ordinals.getOrd(docId));
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract long getValueByOrd(int ord);
|
public abstract long getValueByOrd(long ord);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Iter getIter(int docId) {
|
public final Iter getIter(int docId) {
|
||||||
|
@ -127,7 +127,7 @@ public abstract class LongValues {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final long getValueMissing(int docId, long missingValue) {
|
public final long getValueMissing(int docId, long missingValue) {
|
||||||
final int ord = ordinals.getOrd(docId);
|
final long ord = ordinals.getOrd(docId);
|
||||||
if (ord == 0) {
|
if (ord == 0) {
|
||||||
return missingValue;
|
return missingValue;
|
||||||
} else {
|
} else {
|
||||||
|
@ -185,7 +185,7 @@ public abstract class LongValues {
|
||||||
static class Multi implements Iter {
|
static class Multi implements Iter {
|
||||||
|
|
||||||
private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter;
|
private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter;
|
||||||
private int ord;
|
private long ord;
|
||||||
private WithOrdinals values;
|
private WithOrdinals values;
|
||||||
|
|
||||||
public Multi(WithOrdinals values) {
|
public Multi(WithOrdinals values) {
|
||||||
|
|
|
@ -45,7 +45,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
|
|
||||||
/* Ords for each slot.
|
/* Ords for each slot.
|
||||||
@lucene.internal */
|
@lucene.internal */
|
||||||
final int[] ords;
|
final long[] ords;
|
||||||
|
|
||||||
final SortMode sortMode;
|
final SortMode sortMode;
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
/* Bottom ord (same as ords[bottomSlot] once bottomSlot
|
/* Bottom ord (same as ords[bottomSlot] once bottomSlot
|
||||||
is set). Cached for faster compares.
|
is set). Cached for faster compares.
|
||||||
@lucene.internal */
|
@lucene.internal */
|
||||||
int bottomOrd;
|
long bottomOrd;
|
||||||
|
|
||||||
/* True if current bottom slot matches the current
|
/* True if current bottom slot matches the current
|
||||||
reader.
|
reader.
|
||||||
|
@ -92,7 +92,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
public BytesRefOrdValComparator(IndexFieldData.WithOrdinals<?> indexFieldData, int numHits, SortMode sortMode) {
|
public BytesRefOrdValComparator(IndexFieldData.WithOrdinals<?> indexFieldData, int numHits, SortMode sortMode) {
|
||||||
this.indexFieldData = indexFieldData;
|
this.indexFieldData = indexFieldData;
|
||||||
this.sortMode = sortMode;
|
this.sortMode = sortMode;
|
||||||
ords = new int[numHits];
|
ords = new long[numHits];
|
||||||
values = new BytesRef[numHits];
|
values = new BytesRef[numHits];
|
||||||
readerGen = new int[numHits];
|
readerGen = new int[numHits];
|
||||||
}
|
}
|
||||||
|
@ -100,7 +100,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
@Override
|
@Override
|
||||||
public int compare(int slot1, int slot2) {
|
public int compare(int slot1, int slot2) {
|
||||||
if (readerGen[slot1] == readerGen[slot2]) {
|
if (readerGen[slot1] == readerGen[slot2]) {
|
||||||
return ords[slot1] - ords[slot2];
|
return LongValuesComparator.compare(ords[slot1], ords[slot2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
final BytesRef val1 = values[slot1];
|
final BytesRef val1 = values[slot1];
|
||||||
|
@ -207,7 +207,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
final int docOrd = (readerOrds[doc] & 0xFF);
|
final int docOrd = (readerOrds[doc] & 0xFF);
|
||||||
if (bottomSameReader) {
|
if (bottomSameReader) {
|
||||||
// ord is precisely comparable, even in the equal case
|
// ord is precisely comparable, even in the equal case
|
||||||
return bottomOrd - docOrd;
|
return (int) bottomOrd - docOrd;
|
||||||
} else if (bottomOrd >= docOrd) {
|
} else if (bottomOrd >= docOrd) {
|
||||||
// the equals case always means bottom is > doc
|
// the equals case always means bottom is > doc
|
||||||
// (because we set bottomOrd to the lower bound in
|
// (because we set bottomOrd to the lower bound in
|
||||||
|
@ -253,7 +253,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
final int docOrd = (readerOrds[doc] & 0xFFFF);
|
final int docOrd = (readerOrds[doc] & 0xFFFF);
|
||||||
if (bottomSameReader) {
|
if (bottomSameReader) {
|
||||||
// ord is precisely comparable, even in the equal case
|
// ord is precisely comparable, even in the equal case
|
||||||
return bottomOrd - docOrd;
|
return (int) bottomOrd - docOrd;
|
||||||
} else if (bottomOrd >= docOrd) {
|
} else if (bottomOrd >= docOrd) {
|
||||||
// the equals case always means bottom is > doc
|
// the equals case always means bottom is > doc
|
||||||
// (because we set bottomOrd to the lower bound in
|
// (because we set bottomOrd to the lower bound in
|
||||||
|
@ -299,7 +299,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
final int docOrd = readerOrds[doc];
|
final int docOrd = readerOrds[doc];
|
||||||
if (bottomSameReader) {
|
if (bottomSameReader) {
|
||||||
// ord is precisely comparable, even in the equal case
|
// ord is precisely comparable, even in the equal case
|
||||||
return bottomOrd - docOrd;
|
return (int) bottomOrd - docOrd;
|
||||||
} else if (bottomOrd >= docOrd) {
|
} else if (bottomOrd >= docOrd) {
|
||||||
// the equals case always means bottom is > doc
|
// the equals case always means bottom is > doc
|
||||||
// (because we set bottomOrd to the lower bound in
|
// (because we set bottomOrd to the lower bound in
|
||||||
|
@ -345,10 +345,10 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
@Override
|
@Override
|
||||||
public int compareBottom(int doc) {
|
public int compareBottom(int doc) {
|
||||||
assert bottomSlot != -1;
|
assert bottomSlot != -1;
|
||||||
final int docOrd = readerOrds.getOrd(doc);
|
final long docOrd = readerOrds.getOrd(doc);
|
||||||
if (bottomSameReader) {
|
if (bottomSameReader) {
|
||||||
// ord is precisely comparable, even in the equal case
|
// ord is precisely comparable, even in the equal case
|
||||||
return bottomOrd - docOrd;
|
return LongValuesComparator.compare(bottomOrd, docOrd);
|
||||||
} else if (bottomOrd >= docOrd) {
|
} else if (bottomOrd >= docOrd) {
|
||||||
// the equals case always means bottom is > doc
|
// the equals case always means bottom is > doc
|
||||||
// (because we set bottomOrd to the lower bound in
|
// (because we set bottomOrd to the lower bound in
|
||||||
|
@ -361,7 +361,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void copy(int slot, int doc) {
|
public void copy(int slot, int doc) {
|
||||||
final int ord = readerOrds.getOrd(doc);
|
final long ord = readerOrds.getOrd(doc);
|
||||||
ords[slot] = ord;
|
ords[slot] = ord;
|
||||||
if (ord == 0) {
|
if (ord == 0) {
|
||||||
values[slot] = null;
|
values[slot] = null;
|
||||||
|
@ -428,7 +428,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
bottomSameReader = true;
|
bottomSameReader = true;
|
||||||
readerGen[bottomSlot] = currentReaderGen;
|
readerGen[bottomSlot] = currentReaderGen;
|
||||||
} else {
|
} else {
|
||||||
final int index = binarySearch(termsIndex, bottomValue);
|
final long index = binarySearch(termsIndex, bottomValue);
|
||||||
if (index < 0) {
|
if (index < 0) {
|
||||||
bottomOrd = -index - 2;
|
bottomOrd = -index - 2;
|
||||||
bottomSameReader = false;
|
bottomSameReader = false;
|
||||||
|
@ -448,15 +448,15 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
return values[slot];
|
return values[slot];
|
||||||
}
|
}
|
||||||
|
|
||||||
final protected static int binarySearch(BytesValues.WithOrdinals a, BytesRef key) {
|
final protected static long binarySearch(BytesValues.WithOrdinals a, BytesRef key) {
|
||||||
return binarySearch(a, key, 1, a.ordinals().getNumOrds());
|
return binarySearch(a, key, 1, a.ordinals().getNumOrds());
|
||||||
}
|
}
|
||||||
|
|
||||||
final protected static int binarySearch(BytesValues.WithOrdinals a, BytesRef key, int low, int high) {
|
final protected static long binarySearch(BytesValues.WithOrdinals a, BytesRef key, long low, long high) {
|
||||||
assert a.getValueByOrd(high) == null | a.getValueByOrd(high) != null; // make sure we actually can get these values
|
assert a.getValueByOrd(high) == null | a.getValueByOrd(high) != null; // make sure we actually can get these values
|
||||||
assert a.getValueByOrd(low) == null | a.getValueByOrd(low) != null;
|
assert a.getValueByOrd(low) == null | a.getValueByOrd(low) != null;
|
||||||
while (low <= high) {
|
while (low <= high) {
|
||||||
int mid = (low + high) >>> 1;
|
long mid = (low + high) >>> 1;
|
||||||
BytesRef midVal = a.getValueByOrd(mid);
|
BytesRef midVal = a.getValueByOrd(mid);
|
||||||
int cmp;
|
int cmp;
|
||||||
if (midVal != null) {
|
if (midVal != null) {
|
||||||
|
@ -488,10 +488,10 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareBottom(int doc) throws IOException {
|
public int compareBottom(int doc) throws IOException {
|
||||||
final int docOrd = getRelevantOrd(readerOrds, doc, sortMode);
|
final long docOrd = getRelevantOrd(readerOrds, doc, sortMode);
|
||||||
if (bottomSameReader) {
|
if (bottomSameReader) {
|
||||||
// ord is precisely comparable, even in the equal case
|
// ord is precisely comparable, even in the equal case
|
||||||
return bottomOrd - docOrd;
|
return LongValuesComparator.compare(bottomOrd, docOrd);
|
||||||
} else if (bottomOrd >= docOrd) {
|
} else if (bottomOrd >= docOrd) {
|
||||||
// the equals case always means bottom is > doc
|
// the equals case always means bottom is > doc
|
||||||
// (because we set bottomOrd to the lower bound in
|
// (because we set bottomOrd to the lower bound in
|
||||||
|
@ -504,7 +504,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void copy(int slot, int doc) throws IOException {
|
public void copy(int slot, int doc) throws IOException {
|
||||||
final int ord = getRelevantOrd(readerOrds, doc, sortMode);
|
final long ord = getRelevantOrd(readerOrds, doc, sortMode);
|
||||||
ords[slot] = ord;
|
ords[slot] = ord;
|
||||||
if (ord == 0) {
|
if (ord == 0) {
|
||||||
values[slot] = null;
|
values[slot] = null;
|
||||||
|
@ -561,14 +561,14 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
return relevantVal;
|
return relevantVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int getRelevantOrd(Ordinals.Docs readerOrds, int docId, SortMode sortMode) {
|
static long getRelevantOrd(Ordinals.Docs readerOrds, int docId, SortMode sortMode) {
|
||||||
Ordinals.Docs.Iter iter = readerOrds.getIter(docId);
|
Ordinals.Docs.Iter iter = readerOrds.getIter(docId);
|
||||||
int currentVal = iter.next();
|
long currentVal = iter.next();
|
||||||
if (currentVal == 0) {
|
if (currentVal == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int relevantVal = currentVal;
|
long relevantVal = currentVal;
|
||||||
while (true) {
|
while (true) {
|
||||||
if (sortMode == SortMode.MAX) {
|
if (sortMode == SortMode.MAX) {
|
||||||
if (currentVal > relevantVal) {
|
if (currentVal > relevantVal) {
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
package org.elasticsearch.index.fielddata.ordinals;
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.LongsRef;
|
||||||
import org.elasticsearch.common.RamUsage;
|
import org.elasticsearch.common.RamUsage;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -64,13 +64,13 @@ public class DocIdOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumOrds() {
|
public long getNumOrds() {
|
||||||
return numDocs;
|
return numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getMaxOrd() {
|
public long getMaxOrd() {
|
||||||
return numDocs + 1;
|
return 1L + numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -81,7 +81,7 @@ public class DocIdOrdinals implements Ordinals {
|
||||||
public static class Docs implements Ordinals.Docs {
|
public static class Docs implements Ordinals.Docs {
|
||||||
|
|
||||||
private final DocIdOrdinals parent;
|
private final DocIdOrdinals parent;
|
||||||
private final IntsRef intsScratch = new IntsRef(new int[1], 0, 1);
|
private final LongsRef longsScratch = new LongsRef(new long[1], 0, 1);
|
||||||
private final SingleValueIter iter = new SingleValueIter();
|
private final SingleValueIter iter = new SingleValueIter();
|
||||||
|
|
||||||
public Docs(DocIdOrdinals parent) {
|
public Docs(DocIdOrdinals parent) {
|
||||||
|
@ -99,12 +99,12 @@ public class DocIdOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumOrds() {
|
public long getNumOrds() {
|
||||||
return parent.getNumOrds();
|
return parent.getNumOrds();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getMaxOrd() {
|
public long getMaxOrd() {
|
||||||
return parent.getMaxOrd();
|
return parent.getMaxOrd();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,14 +114,14 @@ public class DocIdOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getOrd(int docId) {
|
public long getOrd(int docId) {
|
||||||
return docId + 1;
|
return docId + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntsRef getOrds(int docId) {
|
public LongsRef getOrds(int docId) {
|
||||||
intsScratch.ints[0] = docId + 1;
|
longsScratch.longs[0] = docId + 1;
|
||||||
return intsScratch;
|
return longsScratch;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
package org.elasticsearch.index.fielddata.ordinals;
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.LongsRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*/
|
*/
|
||||||
|
@ -57,12 +57,12 @@ public class EmptyOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumOrds() {
|
public long getNumOrds() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getMaxOrd() {
|
public long getMaxOrd() {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ public class EmptyOrdinals implements Ordinals {
|
||||||
public static class Docs implements Ordinals.Docs {
|
public static class Docs implements Ordinals.Docs {
|
||||||
|
|
||||||
private final EmptyOrdinals parent;
|
private final EmptyOrdinals parent;
|
||||||
public static final IntsRef EMPTY_INTS_REF = new IntsRef();
|
public static final LongsRef EMPTY_LONGS_REF = new LongsRef();
|
||||||
|
|
||||||
public Docs(EmptyOrdinals parent) {
|
public Docs(EmptyOrdinals parent) {
|
||||||
this.parent = parent;
|
this.parent = parent;
|
||||||
|
@ -91,12 +91,12 @@ public class EmptyOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumOrds() {
|
public long getNumOrds() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getMaxOrd() {
|
public long getMaxOrd() {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,13 +106,13 @@ public class EmptyOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getOrd(int docId) {
|
public long getOrd(int docId) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntsRef getOrds(int docId) {
|
public LongsRef getOrds(int docId) {
|
||||||
return EMPTY_INTS_REF;
|
return EMPTY_LONGS_REF;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,189 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to ElasticSearch and Shay Banon under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. ElasticSearch licenses this
|
|
||||||
* file to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing,
|
|
||||||
* software distributed under the License is distributed on an
|
|
||||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
* KIND, either express or implied. See the License for the
|
|
||||||
* specific language governing permissions and limitations
|
|
||||||
* under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.elasticsearch.common.RamUsage;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* "Flat" multi valued ordinals, the first level array size is as the maximum
|
|
||||||
* values a docId has. Ordinals are populated in order from the first flat array
|
|
||||||
* value to the next.
|
|
||||||
*/
|
|
||||||
public final class MultiFlatArrayOrdinals implements Ordinals {
|
|
||||||
|
|
||||||
// ordinals with value 0 indicates no value
|
|
||||||
private final int[][] ordinals;
|
|
||||||
private final int numDocs;
|
|
||||||
private final int numOrds;
|
|
||||||
private final int maxOrd;
|
|
||||||
|
|
||||||
private long size = -1;
|
|
||||||
|
|
||||||
public MultiFlatArrayOrdinals(int[][] ordinals, int numOrds) {
|
|
||||||
assert ordinals.length > 0;
|
|
||||||
this.ordinals = ordinals;
|
|
||||||
this.numDocs = ordinals[0].length;
|
|
||||||
this.numOrds = numOrds;
|
|
||||||
this.maxOrd = numOrds + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasSingleArrayBackingStorage() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getBackingStorage() {
|
|
||||||
return ordinals;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getMemorySizeInBytes() {
|
|
||||||
if (size == -1) {
|
|
||||||
long size = 0;
|
|
||||||
size += RamUsage.NUM_BYTES_ARRAY_HEADER; // for the top level array
|
|
||||||
for (int[] ordinal : ordinals) {
|
|
||||||
size += RamUsage.NUM_BYTES_INT * ordinal.length + RamUsage.NUM_BYTES_ARRAY_HEADER;
|
|
||||||
}
|
|
||||||
this.size = size;
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isMultiValued() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumDocs() {
|
|
||||||
return numDocs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumOrds() {
|
|
||||||
return numOrds;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getMaxOrd() {
|
|
||||||
return this.maxOrd;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Docs ordinals() {
|
|
||||||
return new Docs(this, ordinals);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class Docs implements Ordinals.Docs {
|
|
||||||
|
|
||||||
private final MultiFlatArrayOrdinals parent;
|
|
||||||
private final int[][] ordinals;
|
|
||||||
private final IterImpl iter;
|
|
||||||
|
|
||||||
private final IntsRef intsScratch;
|
|
||||||
|
|
||||||
public Docs(MultiFlatArrayOrdinals parent, int[][] ordinals) {
|
|
||||||
this.parent = parent;
|
|
||||||
this.ordinals = ordinals;
|
|
||||||
this.iter = new IterImpl(ordinals);
|
|
||||||
this.intsScratch = new IntsRef(new int[16], 0 , 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Ordinals ordinals() {
|
|
||||||
return this.parent;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumDocs() {
|
|
||||||
return parent.getNumDocs();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumOrds() {
|
|
||||||
return parent.getNumOrds();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getMaxOrd() {
|
|
||||||
return parent.getMaxOrd();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isMultiValued() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getOrd(int docId) {
|
|
||||||
return ordinals[0][docId];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntsRef getOrds(int docId) {
|
|
||||||
intsScratch.offset = 0;
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < ordinals.length; i++) {
|
|
||||||
int ordinal = ordinals[i][docId];
|
|
||||||
if (ordinal == 0) {
|
|
||||||
if (i == 0) {
|
|
||||||
intsScratch.length = 0;
|
|
||||||
return intsScratch;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
intsScratch.grow(i+1);
|
|
||||||
intsScratch.ints[i] = ordinal;
|
|
||||||
}
|
|
||||||
intsScratch.length = i;
|
|
||||||
return intsScratch;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Iter getIter(int docId) {
|
|
||||||
return iter.reset(docId);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class IterImpl implements Docs.Iter {
|
|
||||||
|
|
||||||
private final int[][] ordinals;
|
|
||||||
private int docId;
|
|
||||||
private int i;
|
|
||||||
|
|
||||||
public IterImpl(int[][] ordinals) {
|
|
||||||
this.ordinals = ordinals;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IterImpl reset(int docId) {
|
|
||||||
this.docId = docId;
|
|
||||||
this.i = 0;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
if (i >= ordinals.length) return 0;
|
|
||||||
return ordinals[i++][docId];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,219 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.fielddata.ordinals;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.LongsRef;
|
||||||
|
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||||
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
import org.elasticsearch.common.RamUsage;
|
||||||
|
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter;
|
||||||
|
|
||||||
|
/** {@link Ordinals} implementation which is efficient at storing field data ordinals for multi-valued or sparse fields. */
|
||||||
|
public class MultiOrdinals implements Ordinals {
|
||||||
|
|
||||||
|
// hard-coded in Lucene 4.3 but will be exposed in Lucene 4.4
|
||||||
|
static {
|
||||||
|
assert Lucene.VERSION == Version.LUCENE_43;
|
||||||
|
}
|
||||||
|
private static final int OFFSETS_PAGE_SIZE = 1024;
|
||||||
|
|
||||||
|
/** Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */
|
||||||
|
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) {
|
||||||
|
final int bitsPerOrd = PackedInts.bitsRequired(numOrds);
|
||||||
|
// Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
|
||||||
|
// beginning of the block and all docs have one at the end of the block
|
||||||
|
final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
|
||||||
|
final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
|
||||||
|
final int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
|
||||||
|
final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
|
||||||
|
final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
|
||||||
|
return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final boolean multiValued;
|
||||||
|
private final long numOrds;
|
||||||
|
private final MonotonicAppendingLongBuffer endOffsets;
|
||||||
|
private final AppendingLongBuffer ords;
|
||||||
|
|
||||||
|
public MultiOrdinals(OrdinalsBuilder builder) {
|
||||||
|
multiValued = builder.getNumMultiValuesDocs() > 0;
|
||||||
|
numOrds = builder.getNumOrds();
|
||||||
|
endOffsets = new MonotonicAppendingLongBuffer();
|
||||||
|
ords = new AppendingLongBuffer();
|
||||||
|
long lastEndOffset = 0;
|
||||||
|
for (int i = 0; i < builder.maxDoc(); ++i) {
|
||||||
|
final LongsRef docOrds = builder.docOrds(i);
|
||||||
|
final long endOffset = lastEndOffset + docOrds.length;
|
||||||
|
endOffsets.add(endOffset);
|
||||||
|
for (int j = 0; j < docOrds.length; ++j) {
|
||||||
|
ords.add(docOrds.longs[docOrds.offset + j] - 1);
|
||||||
|
}
|
||||||
|
lastEndOffset = endOffset;
|
||||||
|
}
|
||||||
|
assert endOffsets.size() == builder.maxDoc();
|
||||||
|
assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasSingleArrayBackingStorage() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getBackingStorage() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getMemorySizeInBytes() {
|
||||||
|
return endOffsets.ramBytesUsed() + ords.ramBytesUsed();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isMultiValued() {
|
||||||
|
return multiValued;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumDocs() {
|
||||||
|
return (int) endOffsets.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getNumOrds() {
|
||||||
|
return numOrds;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getMaxOrd() {
|
||||||
|
return numOrds + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Ordinals.Docs ordinals() {
|
||||||
|
return new MultiDocs(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
static class MultiDocs implements Ordinals.Docs {
|
||||||
|
|
||||||
|
private final MultiOrdinals ordinals;
|
||||||
|
private final MonotonicAppendingLongBuffer endOffsets;
|
||||||
|
private final AppendingLongBuffer ords;
|
||||||
|
private final LongsRef longsScratch;
|
||||||
|
private final MultiIter iter;
|
||||||
|
|
||||||
|
MultiDocs(MultiOrdinals ordinals) {
|
||||||
|
this.ordinals = ordinals;
|
||||||
|
this.endOffsets = ordinals.endOffsets;
|
||||||
|
this.ords = ordinals.ords;
|
||||||
|
this.longsScratch = new LongsRef(16);
|
||||||
|
this.iter = new MultiIter(ords);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Ordinals ordinals() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumDocs() {
|
||||||
|
return ordinals.getNumDocs();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getNumOrds() {
|
||||||
|
return ordinals.getNumOrds();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getMaxOrd() {
|
||||||
|
return ordinals.getMaxOrd();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isMultiValued() {
|
||||||
|
return ordinals.isMultiValued();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getOrd(int docId) {
|
||||||
|
final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0;
|
||||||
|
final long endOffset = endOffsets.get(docId);
|
||||||
|
if (startOffset == endOffset) {
|
||||||
|
return 0L; // ord for missing values
|
||||||
|
} else {
|
||||||
|
return 1L + ords.get(startOffset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongsRef getOrds(int docId) {
|
||||||
|
final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0;
|
||||||
|
final long endOffset = endOffsets.get(docId);
|
||||||
|
final int numValues = (int) (endOffset - startOffset);
|
||||||
|
if (longsScratch.length < numValues) {
|
||||||
|
longsScratch.longs = new long[ArrayUtil.oversize(numValues, RamUsage.NUM_BYTES_LONG)];
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numValues; ++i) {
|
||||||
|
longsScratch.longs[i] = 1L + ords.get(startOffset + i);
|
||||||
|
}
|
||||||
|
longsScratch.offset = 0;
|
||||||
|
longsScratch.length = numValues;
|
||||||
|
return longsScratch;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iter getIter(int docId) {
|
||||||
|
final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0;
|
||||||
|
final long endOffset = endOffsets.get(docId);
|
||||||
|
iter.offset = startOffset;
|
||||||
|
iter.endOffset = endOffset;
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static class MultiIter implements Iter {
|
||||||
|
|
||||||
|
final AppendingLongBuffer ordinals;
|
||||||
|
long offset, endOffset;
|
||||||
|
|
||||||
|
MultiIter(AppendingLongBuffer ordinals) {
|
||||||
|
this.ordinals = ordinals;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long next() {
|
||||||
|
if (offset >= endOffset) {
|
||||||
|
return 0L;
|
||||||
|
} else {
|
||||||
|
return 1L + ordinals.get(offset++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
package org.elasticsearch.index.fielddata.ordinals;
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.LongsRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A thread safe ordinals abstraction. Ordinals can only be positive integers.
|
* A thread safe ordinals abstraction. Ordinals can only be positive integers.
|
||||||
|
@ -54,13 +54,13 @@ public interface Ordinals {
|
||||||
/**
|
/**
|
||||||
* The number of ordinals, excluding the "0" ordinal indicating a missing value.
|
* The number of ordinals, excluding the "0" ordinal indicating a missing value.
|
||||||
*/
|
*/
|
||||||
int getNumOrds();
|
long getNumOrds();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns total unique ord count; this includes +1 for
|
* Returns total unique ord count; this includes +1 for
|
||||||
* the null ord (always 0).
|
* the null ord (always 0).
|
||||||
*/
|
*/
|
||||||
int getMaxOrd();
|
long getMaxOrd();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a lightweight (non thread safe) view iterator of the ordinals.
|
* Returns a lightweight (non thread safe) view iterator of the ordinals.
|
||||||
|
@ -88,13 +88,13 @@ public interface Ordinals {
|
||||||
/**
|
/**
|
||||||
* The number of ordinals, excluding the "0" ordinal (indicating a missing value).
|
* The number of ordinals, excluding the "0" ordinal (indicating a missing value).
|
||||||
*/
|
*/
|
||||||
int getNumOrds();
|
long getNumOrds();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns total unique ord count; this includes +1 for
|
* Returns total unique ord count; this includes +1 for
|
||||||
* the null ord (always 0).
|
* the null ord (always 0).
|
||||||
*/
|
*/
|
||||||
int getMaxOrd();
|
long getMaxOrd();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is one of the docs maps to more than one ordinal?
|
* Is one of the docs maps to more than one ordinal?
|
||||||
|
@ -105,13 +105,13 @@ public interface Ordinals {
|
||||||
* The ordinal that maps to the relevant docId. If it has no value, returns
|
* The ordinal that maps to the relevant docId. If it has no value, returns
|
||||||
* <tt>0</tt>.
|
* <tt>0</tt>.
|
||||||
*/
|
*/
|
||||||
int getOrd(int docId);
|
long getOrd(int docId);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an array of ordinals matching the docIds, with 0 length one
|
* Returns an array of ordinals matching the docIds, with 0 length one
|
||||||
* for a doc with no ordinals.
|
* for a doc with no ordinals.
|
||||||
*/
|
*/
|
||||||
IntsRef getOrds(int docId);
|
LongsRef getOrds(int docId);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an iterator of the ordinals that match the docId, with an
|
* Returns an iterator of the ordinals that match the docId, with an
|
||||||
|
@ -128,7 +128,7 @@ public interface Ordinals {
|
||||||
/**
|
/**
|
||||||
* Gets the next ordinal. Returning 0 if the iteration is exhausted.
|
* Gets the next ordinal. Returning 0 if the iteration is exhausted.
|
||||||
*/
|
*/
|
||||||
int next();
|
long next();
|
||||||
}
|
}
|
||||||
|
|
||||||
static class EmptyIter implements Iter {
|
static class EmptyIter implements Iter {
|
||||||
|
@ -136,23 +136,23 @@ public interface Ordinals {
|
||||||
public static EmptyIter INSTANCE = new EmptyIter();
|
public static EmptyIter INSTANCE = new EmptyIter();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int next() {
|
public long next() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class SingleValueIter implements Iter {
|
static class SingleValueIter implements Iter {
|
||||||
|
|
||||||
private int value;
|
private long value;
|
||||||
|
|
||||||
public SingleValueIter reset(int value) {
|
public SingleValueIter reset(long value) {
|
||||||
this.value = value;
|
this.value = value;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int next() {
|
public long next() {
|
||||||
int actual = value;
|
long actual = value;
|
||||||
value = 0;
|
value = 0;
|
||||||
return actual;
|
return actual;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
|
||||||
/*
|
/*
|
||||||
* Licensed to ElasticSearch and Shay Banon under one
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
@ -17,21 +16,21 @@ package org.elasticsearch.index.fielddata.ordinals;
|
||||||
* specific language governing permissions and limitations
|
* specific language governing permissions and limitations
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.fielddata.ordinals;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FilteredTermsEnum;
|
import org.apache.lucene.index.FilteredTermsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.*;
|
import org.apache.lucene.util.*;
|
||||||
import org.apache.lucene.util.IntBlockPool.Allocator;
|
|
||||||
import org.apache.lucene.util.IntBlockPool.DirectAllocator;
|
|
||||||
import org.apache.lucene.util.packed.GrowableWriter;
|
import org.apache.lucene.util.packed.GrowableWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.apache.lucene.util.packed.XPagedGrowableWriter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -41,54 +40,251 @@ import java.util.Comparator;
|
||||||
*/
|
*/
|
||||||
public final class OrdinalsBuilder implements Closeable {
|
public final class OrdinalsBuilder implements Closeable {
|
||||||
|
|
||||||
private final int maxDoc;
|
/** Default acceptable overhead ratio. {@link OrdinalsBuilder} memory usage is mostly transient so it is likely a better trade-off to
|
||||||
private int[] mvOrds;
|
* trade memory for speed in order to resize less often. */
|
||||||
private GrowableWriter svOrds;
|
public static final float DEFAULT_ACCEPTABLE_OVERHEAD_RATIO = PackedInts.FAST;
|
||||||
|
|
||||||
private int[] offsets;
|
/** The following structure is used to store ordinals. The idea is to store ords on levels of increasing sizes. Level 0 stores
|
||||||
private final IntBlockPool pool;
|
* 1 value and 1 pointer to level 1. Level 1 stores 2 values and 1 pointer to level 2, ..., Level n stores 2**n values and
|
||||||
private final IntBlockPool.SliceWriter writer;
|
* 1 pointer to level n+1. If at some point an ordinal or a pointer has 0 as a value, this means that there are no remaining
|
||||||
private final IntsRef intsRef = new IntsRef(1);
|
* values. On the first level, ordinals.get(docId) is the first ordinal for docId or 0 if the document has no ordinals. On
|
||||||
private final IntBlockPool.SliceReader reader;
|
* subsequent levels, the first 2^level slots are reserved and all have 0 as a value.
|
||||||
private int currentOrd = 0;
|
* <pre>
|
||||||
|
* Example for an index of 3 docs (O=ordinal, P = pointer)
|
||||||
|
* Level 0:
|
||||||
|
* ordinals [1] [4] [2]
|
||||||
|
* nextLevelSlices 2 0 1
|
||||||
|
* Level 1:
|
||||||
|
* ordinals [0 0] [2 0] [3 4]
|
||||||
|
* nextLevelSlices 0 0 1
|
||||||
|
* Level 2:
|
||||||
|
* ordinals [0 0 0 0] [5 0 0 0]
|
||||||
|
* nextLevelSlices 0 0
|
||||||
|
* </pre>
|
||||||
|
* On level 0, all documents have an ordinal: 0 has 1, 1 has 4 and 2 has 2 as a first ordinal, this means that we need to read
|
||||||
|
* nextLevelEntries to get the index of their ordinals on the next level. The entry for document 1 is 0, meaning that we have
|
||||||
|
* already read all its ordinals. On the contrary 0 and 2 have more ordinals which are stored at indices 2 and 1. Let's continue
|
||||||
|
* with document 2: it has 2 more ordinals on level 1: 3 and 4 and its next level index is 1 meaning that there are remaining
|
||||||
|
* ordinals on the next level. On level 2 at index 1, we can read [5 0 0 0] meaning that 5 is an ordinal as well, but the
|
||||||
|
* fact that it is followed by zeros means that there are no more ordinals. In the end, document 2 has 2, 3, 4 and 5 as ordinals.
|
||||||
|
*
|
||||||
|
* In addition to these structures, there is another array which stores the current position (level + slice + offset in the slice)
|
||||||
|
* in order to be able to append data in constant time.
|
||||||
|
*/
|
||||||
|
private static class OrdinalsStore {
|
||||||
|
|
||||||
|
private static final int PAGE_SIZE = 1 << 12;
|
||||||
|
|
||||||
|
/** Number of slots at <code>level</code> */
|
||||||
|
private static int numSlots(int level) {
|
||||||
|
return 1 << level;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int slotsMask(int level) {
|
||||||
|
return numSlots(level) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Encode the position for the given level and offset. The idea is to encode the level using unary coding in the lower bits and
|
||||||
|
* then the offset in the higher bits. */
|
||||||
|
private static long position(int level, long offset) {
|
||||||
|
assert level >= 1;
|
||||||
|
return (1 << (level - 1)) | (offset << level);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Decode the level from an encoded position. */
|
||||||
|
private static int level(long position) {
|
||||||
|
return 1 + Long.numberOfTrailingZeros(position);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Decode the offset from the position. */
|
||||||
|
private static long offset(long position, int level) {
|
||||||
|
return position >>> level;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the ID of the slice given an offset. */
|
||||||
|
private static long sliceID(int level, long offset) {
|
||||||
|
return offset >>> level;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Compute the first offset of the given slice. */
|
||||||
|
private static long startOffset(int level, long slice) {
|
||||||
|
return slice << level;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Compute the number of ordinals stored for a value given its current position. */
|
||||||
|
private static int numOrdinals(int level, long offset) {
|
||||||
|
return (1 << level) + (int) (offset & slotsMask(level));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Current position
|
||||||
|
private XPagedGrowableWriter positions;
|
||||||
|
// First level (0) of ordinals and pointers to the next level
|
||||||
|
private final GrowableWriter firstOrdinals;
|
||||||
|
private XPagedGrowableWriter firstNextLevelSlices;
|
||||||
|
// Ordinals and pointers for other levels, starting at 1
|
||||||
|
private final XPagedGrowableWriter[] ordinals;
|
||||||
|
private final XPagedGrowableWriter[] nextLevelSlices;
|
||||||
|
private final int[] sizes;
|
||||||
|
|
||||||
|
private final int startBitsPerValue;
|
||||||
|
private final float acceptableOverheadRatio;
|
||||||
|
|
||||||
|
OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) {
|
||||||
|
this.startBitsPerValue = startBitsPerValue;
|
||||||
|
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||||
|
positions = new XPagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
||||||
|
firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio);
|
||||||
|
// over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc...
|
||||||
|
ordinals = new XPagedGrowableWriter[24];
|
||||||
|
nextLevelSlices = new XPagedGrowableWriter[24];
|
||||||
|
sizes = new int[24];
|
||||||
|
Arrays.fill(sizes, 1); // reserve the 1st slice on every level
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Allocate a new slice and return its ID. */
|
||||||
|
private long newSlice(int level) {
|
||||||
|
final long newSlice = sizes[level]++;
|
||||||
|
// Lazily allocate ordinals
|
||||||
|
if (ordinals[level] == null) {
|
||||||
|
ordinals[level] = new XPagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
|
||||||
|
} else {
|
||||||
|
ordinals[level] = ordinals[level].grow(sizes[level] * numSlots(level));
|
||||||
|
if (nextLevelSlices[level] != null) {
|
||||||
|
nextLevelSlices[level] = nextLevelSlices[level].grow(sizes[level]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return newSlice;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int addOrdinal(int docID, long ordinal) {
|
||||||
|
final long position = positions.get(docID);
|
||||||
|
|
||||||
|
if (position == 0L) { // on the first level
|
||||||
|
// 0 or 1 ordinal
|
||||||
|
if (firstOrdinals.get(docID) == 0L) {
|
||||||
|
firstOrdinals.set(docID, ordinal);
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
final long newSlice = newSlice(1);
|
||||||
|
if (firstNextLevelSlices == null) {
|
||||||
|
firstNextLevelSlices = new XPagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio);
|
||||||
|
}
|
||||||
|
firstNextLevelSlices.set(docID, newSlice);
|
||||||
|
final long offset = startOffset(1, newSlice);
|
||||||
|
ordinals[1].set(offset, ordinal);
|
||||||
|
positions.set(docID, position(1, offset)); // current position is on the 1st level and not allocated yet
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int level = level(position);
|
||||||
|
long offset = offset(position, level);
|
||||||
|
assert offset != 0L;
|
||||||
|
if (((offset + 1) & slotsMask(level)) == 0L) {
|
||||||
|
// reached the end of the slice, allocate a new one on the next level
|
||||||
|
final long newSlice = newSlice(level + 1);
|
||||||
|
if (nextLevelSlices[level] == null) {
|
||||||
|
nextLevelSlices[level] = new XPagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio);
|
||||||
|
}
|
||||||
|
nextLevelSlices[level].set(sliceID(level, offset), newSlice);
|
||||||
|
++level;
|
||||||
|
offset = startOffset(level, newSlice);
|
||||||
|
assert (offset & slotsMask(level)) == 0L;
|
||||||
|
} else {
|
||||||
|
// just go to the next slot
|
||||||
|
++offset;
|
||||||
|
}
|
||||||
|
ordinals[level].set(offset, ordinal);
|
||||||
|
final long newPosition = position(level, offset);
|
||||||
|
positions.set(docID, newPosition);
|
||||||
|
return numOrdinals(level, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void appendOrdinals(int docID, LongsRef ords) {
|
||||||
|
// First level
|
||||||
|
final long firstOrd = firstOrdinals.get(docID);
|
||||||
|
if (firstOrd == 0L) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ords.longs = ArrayUtil.grow(ords.longs, ords.offset + ords.length + 1);
|
||||||
|
ords.longs[ords.offset + ords.length++] = firstOrd;
|
||||||
|
if (firstNextLevelSlices == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
long sliceID = firstNextLevelSlices.get(docID);
|
||||||
|
if (sliceID == 0L) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Other levels
|
||||||
|
for (int level = 1; ; ++level) {
|
||||||
|
final int numSlots = numSlots(level);
|
||||||
|
ords.longs = ArrayUtil.grow(ords.longs, ords.offset + ords.length + numSlots);
|
||||||
|
final long offset = startOffset(level, sliceID);
|
||||||
|
for (int j = 0; j < numSlots; ++j) {
|
||||||
|
final long ord = ordinals[level].get(offset + j);
|
||||||
|
if (ord == 0L) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ords.longs[ords.offset + ords.length++] = ord;
|
||||||
|
}
|
||||||
|
if (nextLevelSlices[level] == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sliceID = nextLevelSlices[level].get(sliceID);
|
||||||
|
if (sliceID == 0L) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private final int maxDoc;
|
||||||
|
private long currentOrd = 0;
|
||||||
private int numDocsWithValue = 0;
|
private int numDocsWithValue = 0;
|
||||||
private int numMultiValuedDocs = 0;
|
private int numMultiValuedDocs = 0;
|
||||||
private int totalNumOrds = 0;
|
private int totalNumOrds = 0;
|
||||||
|
|
||||||
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator, float acceptableOverheadRatio) throws IOException {
|
private OrdinalsStore ordinals;
|
||||||
|
private final LongsRef spare;
|
||||||
|
|
||||||
|
public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException {
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
if (preDefineBitsRequired) {
|
int startBitsPerValue = 8;
|
||||||
int numTerms = (int) terms.size();
|
if (numTerms >= 0) {
|
||||||
if (numTerms == -1) {
|
startBitsPerValue = PackedInts.bitsRequired(numTerms);
|
||||||
svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio);
|
|
||||||
} else {
|
|
||||||
svOrds = new GrowableWriter(PackedInts.bitsRequired(numTerms), maxDoc, acceptableOverheadRatio);
|
|
||||||
}
|
}
|
||||||
} else {
|
ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
|
||||||
svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio);
|
spare = new LongsRef();
|
||||||
}
|
}
|
||||||
pool = new IntBlockPool(allocator);
|
|
||||||
reader = new IntBlockPool.SliceReader(pool);
|
public OrdinalsBuilder(int maxDoc, float acceptableOverheadRatio) throws IOException {
|
||||||
writer = new IntBlockPool.SliceWriter(pool);
|
this(-1, maxDoc, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OrdinalsBuilder(int maxDoc) throws IOException {
|
public OrdinalsBuilder(int maxDoc) throws IOException {
|
||||||
this(null, false, maxDoc, PackedInts.DEFAULT);
|
this(maxDoc, DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, float acceptableOverheadRatio) throws IOException {
|
/**
|
||||||
this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator(), acceptableOverheadRatio);
|
* Returns a shared {@link LongsRef} instance for the given doc ID holding all ordinals associated with it.
|
||||||
|
*/
|
||||||
|
public LongsRef docOrds(int docID) {
|
||||||
|
spare.offset = spare.length = 0;
|
||||||
|
ordinals.appendOrdinals(docID, spare);
|
||||||
|
return spare;
|
||||||
}
|
}
|
||||||
|
|
||||||
public OrdinalsBuilder(Terms terms, int maxDoc, float acceptableOverheadRatio) throws IOException {
|
/** Return a {@link PackedInts.Reader} instance mapping every doc ID to its first ordinal if it exists and 0 otherwise. */
|
||||||
this(terms, true, maxDoc, new DirectAllocator(), acceptableOverheadRatio);
|
public PackedInts.Reader getFirstOrdinals() {
|
||||||
|
return ordinals.firstOrdinals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advances the {@link OrdinalsBuilder} to the next ordinal and
|
* Advances the {@link OrdinalsBuilder} to the next ordinal and
|
||||||
* return the current ordinal.
|
* return the current ordinal.
|
||||||
*/
|
*/
|
||||||
public int nextOrdinal() {
|
public long nextOrdinal() {
|
||||||
return ++currentOrd;
|
return ++currentOrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,7 +292,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
* Retruns the current ordinal or <tt>0</tt> if this build has not been advanced via
|
* Retruns the current ordinal or <tt>0</tt> if this build has not been advanced via
|
||||||
* {@link #nextOrdinal()}.
|
* {@link #nextOrdinal()}.
|
||||||
*/
|
*/
|
||||||
public int currentOrdinal() {
|
public long currentOrdinal() {
|
||||||
return currentOrd;
|
return currentOrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,42 +301,11 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
*/
|
*/
|
||||||
public OrdinalsBuilder addDoc(int doc) {
|
public OrdinalsBuilder addDoc(int doc) {
|
||||||
totalNumOrds++;
|
totalNumOrds++;
|
||||||
if (svOrds != null) {
|
final int numValues = ordinals.addOrdinal(doc, currentOrd);
|
||||||
int docsOrd = (int) svOrds.get(doc);
|
if (numValues == 1) {
|
||||||
if (docsOrd == 0) {
|
++numDocsWithValue;
|
||||||
svOrds.set(doc, currentOrd);
|
} else if (numValues == 2) {
|
||||||
numDocsWithValue++;
|
++numMultiValuedDocs;
|
||||||
} else {
|
|
||||||
// Rebuilding ords that supports mv based on sv ords.
|
|
||||||
mvOrds = new int[maxDoc];
|
|
||||||
for (int docId = 0; docId < maxDoc; docId++) {
|
|
||||||
mvOrds[docId] = (int) svOrds.get(docId);
|
|
||||||
}
|
|
||||||
svOrds = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mvOrds != null) {
|
|
||||||
int docsOrd = mvOrds[doc];
|
|
||||||
if (docsOrd == 0) {
|
|
||||||
mvOrds[doc] = currentOrd;
|
|
||||||
numDocsWithValue++;
|
|
||||||
} else if (docsOrd > 0) {
|
|
||||||
numMultiValuedDocs++;
|
|
||||||
int offset = writer.startNewSlice();
|
|
||||||
writer.writeInt(docsOrd);
|
|
||||||
writer.writeInt(currentOrd);
|
|
||||||
if (offsets == null) {
|
|
||||||
offsets = new int[mvOrds.length];
|
|
||||||
}
|
|
||||||
offsets[doc] = writer.getCurrentOffset();
|
|
||||||
mvOrds[doc] = (-1 * offset) - 1;
|
|
||||||
} else {
|
|
||||||
assert offsets != null;
|
|
||||||
writer.reset(offsets[doc]);
|
|
||||||
writer.writeInt(currentOrd);
|
|
||||||
offsets[doc] = writer.getCurrentOffset();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
@ -149,7 +314,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
* Returns <code>true</code> iff this builder contains a document ID that is associated with more than one ordinal. Otherwise <code>false</code>;
|
* Returns <code>true</code> iff this builder contains a document ID that is associated with more than one ordinal. Otherwise <code>false</code>;
|
||||||
*/
|
*/
|
||||||
public boolean isMultiValued() {
|
public boolean isMultiValued() {
|
||||||
return offsets != null;
|
return numMultiValuedDocs > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -183,7 +348,7 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
/**
|
/**
|
||||||
* Returns the number of distinct ordinals in this builder.
|
* Returns the number of distinct ordinals in this builder.
|
||||||
*/
|
*/
|
||||||
public int getNumOrds() {
|
public long getNumOrds() {
|
||||||
return currentOrd;
|
return currentOrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,18 +361,9 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
final FixedBitSet bitSet = new FixedBitSet(maxDoc);
|
final FixedBitSet bitSet = new FixedBitSet(maxDoc);
|
||||||
if (svOrds != null) {
|
for (int docID = 0; docID < maxDoc; ++docID) {
|
||||||
for (int docId = 0; docId < maxDoc; docId++) {
|
if (ordinals.firstOrdinals.get(docID) != 0) {
|
||||||
int ord = (int) svOrds.get(docId);
|
bitSet.set(docID);
|
||||||
if (ord != 0) {
|
|
||||||
bitSet.set(docId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int docId = 0; docId < maxDoc; docId++) {
|
|
||||||
if (mvOrds[docId] != 0) {
|
|
||||||
bitSet.set(docId);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return bitSet;
|
return bitSet;
|
||||||
|
@ -217,72 +373,15 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
* Builds an {@link Ordinals} instance from the builders current state.
|
* Builds an {@link Ordinals} instance from the builders current state.
|
||||||
*/
|
*/
|
||||||
public Ordinals build(Settings settings) {
|
public Ordinals build(Settings settings) {
|
||||||
if (numMultiValuedDocs == 0) {
|
final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.COMPACT);
|
||||||
return new SinglePackedOrdinals(svOrds.getMutable(), getNumOrds());
|
if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getNumOrds())) {
|
||||||
}
|
// MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields
|
||||||
final String multiOrdinals = settings.get("multi_ordinals", "sparse");
|
return new MultiOrdinals(this);
|
||||||
if ("flat".equals(multiOrdinals)) {
|
|
||||||
final ArrayList<int[]> ordinalBuffer = new ArrayList<int[]>();
|
|
||||||
for (int i = 0; i < mvOrds.length; i++) {
|
|
||||||
final IntsRef docOrds = docOrds(i);
|
|
||||||
while (ordinalBuffer.size() < docOrds.length) {
|
|
||||||
ordinalBuffer.add(new int[mvOrds.length]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int j = docOrds.offset; j < docOrds.offset+docOrds.length; j++) {
|
|
||||||
ordinalBuffer.get(j)[i] = docOrds.ints[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int[][] nativeOrdinals = new int[ordinalBuffer.size()][];
|
|
||||||
for (int i = 0; i < nativeOrdinals.length; i++) {
|
|
||||||
nativeOrdinals[i] = ordinalBuffer.get(i);
|
|
||||||
}
|
|
||||||
return new MultiFlatArrayOrdinals(nativeOrdinals, getNumOrds());
|
|
||||||
} else if ("sparse".equals(multiOrdinals)) {
|
|
||||||
int multiOrdinalsMaxDocs = settings.getAsInt("multi_ordinals_max_docs", 16777216 /* Equal to 64MB per storeage array */);
|
|
||||||
return new SparseMultiArrayOrdinals(this, multiOrdinalsMaxDocs);
|
|
||||||
} else {
|
} else {
|
||||||
throw new ElasticSearchIllegalArgumentException("no applicable fielddata multi_ordinals value, got [" + multiOrdinals + "]");
|
return new SinglePackedOrdinals(this, acceptableOverheadRatio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a shared {@link IntsRef} instance for the given doc ID holding all ordinals associated with it.
|
|
||||||
*/
|
|
||||||
public IntsRef docOrds(int doc) {
|
|
||||||
if (svOrds != null) {
|
|
||||||
int docsOrd = (int) svOrds.get(doc);
|
|
||||||
intsRef.offset = 0;
|
|
||||||
if (docsOrd == 0) {
|
|
||||||
intsRef.length = 0;
|
|
||||||
} else if (docsOrd > 0) {
|
|
||||||
intsRef.ints[0] = docsOrd;
|
|
||||||
intsRef.length = 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int docsOrd = mvOrds[doc];
|
|
||||||
intsRef.offset = 0;
|
|
||||||
if (docsOrd == 0) {
|
|
||||||
intsRef.length = 0;
|
|
||||||
} else if (docsOrd > 0) {
|
|
||||||
intsRef.ints[0] = mvOrds[doc];
|
|
||||||
intsRef.length = 1;
|
|
||||||
} else {
|
|
||||||
assert offsets != null;
|
|
||||||
reader.reset(-1 * (mvOrds[doc] + 1), offsets[doc]);
|
|
||||||
int pos = 0;
|
|
||||||
while (!reader.endOfSlice()) {
|
|
||||||
if (intsRef.ints.length <= pos) {
|
|
||||||
intsRef.ints = ArrayUtil.grow(intsRef.ints, pos + 1);
|
|
||||||
}
|
|
||||||
intsRef.ints[pos++] = reader.readInt();
|
|
||||||
}
|
|
||||||
intsRef.length = pos;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return intsRef;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the maximum document ID this builder can associate with an ordinal
|
* Returns the maximum document ID this builder can associate with an ordinal
|
||||||
*/
|
*/
|
||||||
|
@ -364,7 +463,6 @@ public final class OrdinalsBuilder implements Closeable {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
pool.reset(true, false);
|
ordinals = null;
|
||||||
offsets = null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,146 +0,0 @@
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to ElasticSearch and Shay Banon under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. ElasticSearch licenses this
|
|
||||||
* file to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing,
|
|
||||||
* software distributed under the License is distributed on an
|
|
||||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
* KIND, either express or implied. See the License for the
|
|
||||||
* specific language governing permissions and limitations
|
|
||||||
* under the License.
|
|
||||||
*/
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
|
||||||
import org.elasticsearch.common.RamUsage;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An efficient store for positive integer slices. This pool uses multiple
|
|
||||||
* sliced arrays to hold integers in int array pages rather than an object based
|
|
||||||
* datastructures.
|
|
||||||
*/
|
|
||||||
final class PositiveIntPool {
|
|
||||||
// TODO it might be useful to store the size of the slices in a sep
|
|
||||||
// datastructure rather than useing a negative value to donate this.
|
|
||||||
private final int blockShift;
|
|
||||||
private final int blockMask;
|
|
||||||
private final int blockSize;
|
|
||||||
/**
|
|
||||||
* array of buffers currently used in the pool. Buffers are allocated if
|
|
||||||
* needed don't modify this outside of this class
|
|
||||||
*/
|
|
||||||
private int[][] buffers = new int[10][];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* index into the buffers array pointing to the current buffer used as the
|
|
||||||
* head
|
|
||||||
*/
|
|
||||||
private int bufferUpto = -1;
|
|
||||||
/** Pointer to the current position in head buffer */
|
|
||||||
private int intUpto;
|
|
||||||
/** Current head buffer */
|
|
||||||
private int[] buffer;
|
|
||||||
/** Current head offset */
|
|
||||||
private int intOffset;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new {@link PositiveIntPool} with the given blockShift.
|
|
||||||
*
|
|
||||||
* @param blockShift
|
|
||||||
* the n-the power of two indicating the size of each block in
|
|
||||||
* the paged datastructure. BlockSize = 1 << blockShift
|
|
||||||
*/
|
|
||||||
public PositiveIntPool(int blockShift) {
|
|
||||||
this.blockShift = blockShift;
|
|
||||||
this.blockSize = 1 << blockShift;
|
|
||||||
this.blockMask = blockSize - 1;
|
|
||||||
this.intUpto = blockSize;
|
|
||||||
this.intOffset = -blockSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds all integers in the given slices and returns the positive offset
|
|
||||||
* into the datastructure to retrive this slice.
|
|
||||||
*/
|
|
||||||
public int put(IntsRef slice) {
|
|
||||||
if ( slice.length > blockSize) {
|
|
||||||
throw new ElasticSearchIllegalArgumentException("Can not store slices greater or equal to: " + blockSize);
|
|
||||||
}
|
|
||||||
if ((intUpto + slice.length) > blockSize) {
|
|
||||||
nextBuffer();
|
|
||||||
}
|
|
||||||
final int relativeOffset = intUpto;
|
|
||||||
System.arraycopy(slice.ints, slice.offset, buffer, relativeOffset, slice.length);
|
|
||||||
intUpto += slice.length;
|
|
||||||
buffer[intUpto - 1] *= -1; // mark as end
|
|
||||||
return relativeOffset + intOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the first value of the slice stored at the given offset.
|
|
||||||
* <p>
|
|
||||||
* Note: the slice length must be greater than one otherwise the returned
|
|
||||||
* value is the negative complement of the actual value
|
|
||||||
* </p>
|
|
||||||
*/
|
|
||||||
public int getFirstFromOffset(int offset) {
|
|
||||||
final int blockOffset = offset >> blockShift;
|
|
||||||
final int relativeOffset = offset & blockMask;
|
|
||||||
final int[] currentBuffer = buffers[blockOffset];
|
|
||||||
assert currentBuffer[relativeOffset] >= 0;
|
|
||||||
return currentBuffer[relativeOffset];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves a previously stored slice from the pool.
|
|
||||||
*
|
|
||||||
* @param slice the slice to fill
|
|
||||||
* @param offset the offset where the slice is stored
|
|
||||||
*/
|
|
||||||
public void fill(IntsRef slice, int offset) {
|
|
||||||
final int blockOffset = offset >> blockShift;
|
|
||||||
final int relativeOffset = offset & blockMask;
|
|
||||||
final int[] currentBuffer = buffers[blockOffset];
|
|
||||||
slice.offset = 0;
|
|
||||||
slice.length = 0;
|
|
||||||
for (int i = relativeOffset; i < currentBuffer.length; i++) {
|
|
||||||
slice.length++;
|
|
||||||
if (currentBuffer[i] < 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
if (slice.length != 0) {
|
|
||||||
slice.ints = ArrayUtil.grow(slice.ints, slice.length);
|
|
||||||
System.arraycopy(currentBuffer, relativeOffset, slice.ints, 0, slice.length);
|
|
||||||
slice.ints[slice.length-1] *= -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getMemorySizeInBytes() {
|
|
||||||
return ((bufferUpto + 1) * blockSize * RamUsage.NUM_BYTES_INT) + ((bufferUpto + 1) * RamUsage.NUM_BYTES_ARRAY_HEADER);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void nextBuffer() {
|
|
||||||
if (1 + bufferUpto == buffers.length) {
|
|
||||||
int[][] newBuffers = new int[(int) (buffers.length * 1.5)][];
|
|
||||||
System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
|
|
||||||
buffers = newBuffers;
|
|
||||||
}
|
|
||||||
buffer = buffers[1 + bufferUpto] = new int[blockSize];
|
|
||||||
bufferUpto++;
|
|
||||||
intUpto = 0;
|
|
||||||
intOffset += blockSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
package org.elasticsearch.index.fielddata.ordinals;
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.LongsRef;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
import org.elasticsearch.common.RamUsage;
|
import org.elasticsearch.common.RamUsage;
|
||||||
|
|
||||||
|
@ -29,15 +29,19 @@ public class SinglePackedOrdinals implements Ordinals {
|
||||||
|
|
||||||
// ordinals with value 0 indicates no value
|
// ordinals with value 0 indicates no value
|
||||||
private final PackedInts.Reader reader;
|
private final PackedInts.Reader reader;
|
||||||
private final int numOrds;
|
private final long numOrds;
|
||||||
private final int maxOrd;
|
private final long maxOrd;
|
||||||
|
|
||||||
private long size = -1;
|
private long size = -1;
|
||||||
|
|
||||||
public SinglePackedOrdinals(PackedInts.Reader reader, int numOrds) {
|
public SinglePackedOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) {
|
||||||
|
assert builder.getNumMultiValuesDocs() == 0;
|
||||||
|
this.numOrds = builder.getNumOrds();
|
||||||
|
this.maxOrd = builder.getNumOrds() + 1;
|
||||||
|
// We don't reuse the builder as-is because it might have been built with a higher overhead ratio
|
||||||
|
final PackedInts.Mutable reader = PackedInts.getMutable(builder.maxDoc(), PackedInts.bitsRequired(getNumOrds()), acceptableOverheadRatio);
|
||||||
|
PackedInts.copy(builder.getFirstOrdinals(), 0, reader, 0, builder.maxDoc(), 8 * 1024);
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
this.numOrds = numOrds;
|
|
||||||
this.maxOrd = numOrds + 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -72,12 +76,12 @@ public class SinglePackedOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumOrds() {
|
public long getNumOrds() {
|
||||||
return numOrds;
|
return numOrds;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getMaxOrd() {
|
public long getMaxOrd() {
|
||||||
return maxOrd;
|
return maxOrd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,7 +95,7 @@ public class SinglePackedOrdinals implements Ordinals {
|
||||||
private final SinglePackedOrdinals parent;
|
private final SinglePackedOrdinals parent;
|
||||||
private final PackedInts.Reader reader;
|
private final PackedInts.Reader reader;
|
||||||
|
|
||||||
private final IntsRef intsScratch = new IntsRef(1);
|
private final LongsRef longsScratch = new LongsRef(1);
|
||||||
private final SingleValueIter iter = new SingleValueIter();
|
private final SingleValueIter iter = new SingleValueIter();
|
||||||
|
|
||||||
public Docs(SinglePackedOrdinals parent, PackedInts.Reader reader) {
|
public Docs(SinglePackedOrdinals parent, PackedInts.Reader reader) {
|
||||||
|
@ -110,12 +114,12 @@ public class SinglePackedOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumOrds() {
|
public long getNumOrds() {
|
||||||
return parent.getNumOrds();
|
return parent.getNumOrds();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getMaxOrd() {
|
public long getMaxOrd() {
|
||||||
return parent.getMaxOrd();
|
return parent.getMaxOrd();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -125,21 +129,21 @@ public class SinglePackedOrdinals implements Ordinals {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getOrd(int docId) {
|
public long getOrd(int docId) {
|
||||||
return (int) reader.get(docId);
|
return reader.get(docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntsRef getOrds(int docId) {
|
public LongsRef getOrds(int docId) {
|
||||||
final int ordinal = (int) reader.get(docId);
|
final long ordinal = reader.get(docId);
|
||||||
if (ordinal == 0) {
|
if (ordinal == 0) {
|
||||||
intsScratch.length = 0;
|
longsScratch.length = 0;
|
||||||
} else {
|
} else {
|
||||||
intsScratch.offset = 0;
|
longsScratch.offset = 0;
|
||||||
intsScratch.length = 1;
|
longsScratch.length = 1;
|
||||||
intsScratch.ints[0] = ordinal;
|
longsScratch.longs[0] = ordinal;
|
||||||
}
|
}
|
||||||
return intsScratch;
|
return longsScratch;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,216 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to ElasticSearch and Shay Banon under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. ElasticSearch licenses this
|
|
||||||
* file to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing,
|
|
||||||
* software distributed under the License is distributed on an
|
|
||||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
* KIND, either express or implied. See the License for the
|
|
||||||
* specific language governing permissions and limitations
|
|
||||||
* under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.ordinals;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.elasticsearch.common.RamUsage;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Ordinals implementation that stores the ordinals into sparse fixed arrays.
|
|
||||||
* <p/>
|
|
||||||
* This prevents large ordinal arrays that are created in for example {@link MultiFlatArrayOrdinals} when
|
|
||||||
* only a few documents have a lot of terms per field.
|
|
||||||
*/
|
|
||||||
public final class SparseMultiArrayOrdinals implements Ordinals {
|
|
||||||
|
|
||||||
private final int[] lookup;
|
|
||||||
private final PositiveIntPool pool;
|
|
||||||
private final int numOrds;
|
|
||||||
private final int maxOrd;
|
|
||||||
private final int numDocs;
|
|
||||||
private long size = -1;
|
|
||||||
|
|
||||||
public SparseMultiArrayOrdinals(OrdinalsBuilder builder, int maxSize) {
|
|
||||||
int blockShift = Math.min(floorPow2(builder.getTotalNumOrds() << 1), floorPow2(maxSize));
|
|
||||||
this.pool = new PositiveIntPool(Math.max(4, blockShift));
|
|
||||||
this.numDocs = builder.maxDoc();
|
|
||||||
|
|
||||||
|
|
||||||
this.lookup = new int[numDocs];
|
|
||||||
this.numOrds = builder.getNumOrds();
|
|
||||||
this.maxOrd = numOrds + 1;
|
|
||||||
IntsRef spare;
|
|
||||||
for (int doc = 0; doc < numDocs; doc++) {
|
|
||||||
spare = builder.docOrds(doc);
|
|
||||||
final int size = spare.length;
|
|
||||||
if (size == 0) {
|
|
||||||
lookup[doc] = 0;
|
|
||||||
} else if (size == 1) {
|
|
||||||
lookup[doc] = spare.ints[spare.offset];
|
|
||||||
} else {
|
|
||||||
int offset = pool.put(spare);
|
|
||||||
lookup[doc] = -(offset) - 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int floorPow2(int number) {
|
|
||||||
return 31 - Integer.numberOfLeadingZeros(number);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasSingleArrayBackingStorage() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getBackingStorage() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getMemorySizeInBytes() {
|
|
||||||
if (size == -1) {
|
|
||||||
size = (RamUsage.NUM_BYTES_ARRAY_HEADER + (RamUsage.NUM_BYTES_INT * lookup.length)) + pool.getMemorySizeInBytes();
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isMultiValued() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumDocs() {
|
|
||||||
return numDocs;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumOrds() {
|
|
||||||
return numOrds;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getMaxOrd() {
|
|
||||||
return maxOrd;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Docs ordinals() {
|
|
||||||
return new Docs(this, lookup, pool);
|
|
||||||
}
|
|
||||||
|
|
||||||
static class Docs implements Ordinals.Docs {
|
|
||||||
|
|
||||||
private final SparseMultiArrayOrdinals parent;
|
|
||||||
private final int[] lookup;
|
|
||||||
|
|
||||||
private final IterImpl iter;
|
|
||||||
private final PositiveIntPool pool;
|
|
||||||
private final IntsRef spare = new IntsRef(1);
|
|
||||||
|
|
||||||
public Docs(SparseMultiArrayOrdinals parent, int[] lookup, PositiveIntPool pool) {
|
|
||||||
this.parent = parent;
|
|
||||||
this.lookup = lookup;
|
|
||||||
this.pool = pool;
|
|
||||||
this.iter = new IterImpl(lookup, pool);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Ordinals ordinals() {
|
|
||||||
return this.parent;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumDocs() {
|
|
||||||
return parent.getNumDocs();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getNumOrds() {
|
|
||||||
return parent.getNumOrds();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getMaxOrd() {
|
|
||||||
return parent.getMaxOrd();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isMultiValued() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getOrd(int docId) {
|
|
||||||
int pointer = lookup[docId];
|
|
||||||
if (pointer < 0) {
|
|
||||||
return pool.getFirstFromOffset(-(pointer + 1));
|
|
||||||
}
|
|
||||||
return pointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntsRef getOrds(int docId) {
|
|
||||||
spare.offset = 0;
|
|
||||||
int pointer = lookup[docId];
|
|
||||||
if (pointer == 0) {
|
|
||||||
spare.length = 0;
|
|
||||||
} else if (pointer > 0) {
|
|
||||||
spare.length = 1;
|
|
||||||
spare.ints[0] = pointer;
|
|
||||||
return spare;
|
|
||||||
} else {
|
|
||||||
pool.fill(spare, -(pointer + 1));
|
|
||||||
return spare;
|
|
||||||
}
|
|
||||||
return spare;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Iter getIter(int docId) {
|
|
||||||
return iter.reset(docId);
|
|
||||||
}
|
|
||||||
|
|
||||||
class IterImpl implements Docs.Iter {
|
|
||||||
private final int[] lookup;
|
|
||||||
private final PositiveIntPool pool;
|
|
||||||
private final IntsRef slice = new IntsRef(1);
|
|
||||||
private int valuesOffset;
|
|
||||||
|
|
||||||
public IterImpl(int[] lookup, PositiveIntPool pool) {
|
|
||||||
this.lookup = lookup;
|
|
||||||
this.pool = pool;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IterImpl reset(int docId) {
|
|
||||||
final int pointer = lookup[docId];
|
|
||||||
if (pointer < 0) {
|
|
||||||
pool.fill(slice, -(pointer + 1));
|
|
||||||
} else {
|
|
||||||
slice.ints[0] = pointer;
|
|
||||||
slice.offset = 0;
|
|
||||||
slice.length = 1;
|
|
||||||
}
|
|
||||||
valuesOffset = 0;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int next() {
|
|
||||||
if (valuesOffset >= slice.length) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return slice.ints[slice.offset + (valuesOffset++)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.elasticsearch.common.RamUsage;
|
import org.elasticsearch.common.RamUsage;
|
||||||
|
import org.elasticsearch.common.util.BigDoubleArrayList;
|
||||||
import org.elasticsearch.index.fielddata.*;
|
import org.elasticsearch.index.fielddata.*;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||||
|
|
||||||
|
@ -30,14 +31,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
|
|
||||||
public static final DoubleArrayAtomicFieldData EMPTY = new Empty();
|
public static final DoubleArrayAtomicFieldData EMPTY = new Empty();
|
||||||
|
|
||||||
protected final double[] values;
|
|
||||||
private final int numDocs;
|
private final int numDocs;
|
||||||
|
|
||||||
protected long size = -1;
|
protected long size = -1;
|
||||||
|
|
||||||
public DoubleArrayAtomicFieldData(double[] values, int numDocs) {
|
public DoubleArrayAtomicFieldData(int numDocs) {
|
||||||
super(true);
|
super(true);
|
||||||
this.values = values;
|
|
||||||
this.numDocs = numDocs;
|
this.numDocs = numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,7 +52,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
static class Empty extends DoubleArrayAtomicFieldData {
|
static class Empty extends DoubleArrayAtomicFieldData {
|
||||||
|
|
||||||
Empty() {
|
Empty() {
|
||||||
super(null, 0);
|
super(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -94,10 +93,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
|
|
||||||
public static class WithOrdinals extends DoubleArrayAtomicFieldData {
|
public static class WithOrdinals extends DoubleArrayAtomicFieldData {
|
||||||
|
|
||||||
|
private final BigDoubleArrayList values;
|
||||||
private final Ordinals ordinals;
|
private final Ordinals ordinals;
|
||||||
|
|
||||||
public WithOrdinals(double[] values, int numDocs, Ordinals ordinals) {
|
public WithOrdinals(BigDoubleArrayList values, int numDocs, Ordinals ordinals) {
|
||||||
super(values, numDocs);
|
super(numDocs);
|
||||||
|
this.values = values;
|
||||||
this.ordinals = ordinals;
|
this.ordinals = ordinals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +115,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
@Override
|
@Override
|
||||||
public long getMemorySizeInBytes() {
|
public long getMemorySizeInBytes() {
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + ordinals.getMemorySizeInBytes();
|
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes();
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -133,31 +134,31 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
|
|
||||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
|
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
|
||||||
|
|
||||||
private final double[] values;
|
private final BigDoubleArrayList values;
|
||||||
|
|
||||||
LongValues(double[] values, Ordinals.Docs ordinals) {
|
LongValues(BigDoubleArrayList values, Ordinals.Docs ordinals) {
|
||||||
super(ordinals);
|
super(ordinals);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final long getValueByOrd(int ord) {
|
public final long getValueByOrd(long ord) {
|
||||||
return (long) values[ord];
|
return (long) values.get(ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
|
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
|
||||||
|
|
||||||
private final double[] values;
|
private final BigDoubleArrayList values;
|
||||||
|
|
||||||
DoubleValues(double[] values, Ordinals.Docs ordinals) {
|
DoubleValues(BigDoubleArrayList values, Ordinals.Docs ordinals) {
|
||||||
super(ordinals);
|
super(ordinals);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getValueByOrd(int ord) {
|
public double getValueByOrd(long ord) {
|
||||||
return values[ord];
|
return values.get(ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -168,10 +169,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
*/
|
*/
|
||||||
public static class SingleFixedSet extends DoubleArrayAtomicFieldData {
|
public static class SingleFixedSet extends DoubleArrayAtomicFieldData {
|
||||||
|
|
||||||
|
private final BigDoubleArrayList values;
|
||||||
private final FixedBitSet set;
|
private final FixedBitSet set;
|
||||||
|
|
||||||
public SingleFixedSet(double[] values, int numDocs, FixedBitSet set) {
|
public SingleFixedSet(BigDoubleArrayList values, int numDocs, FixedBitSet set) {
|
||||||
super(values, numDocs);
|
super(numDocs);
|
||||||
|
this.values = values;
|
||||||
this.set = set;
|
this.set = set;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,7 +191,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
@Override
|
@Override
|
||||||
public long getMemorySizeInBytes() {
|
public long getMemorySizeInBytes() {
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
|
size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -205,10 +208,10 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
|
|
||||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
|
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
|
||||||
|
|
||||||
private final double[] values;
|
private final BigDoubleArrayList values;
|
||||||
private final FixedBitSet set;
|
private final FixedBitSet set;
|
||||||
|
|
||||||
LongValues(double[] values, FixedBitSet set) {
|
LongValues(BigDoubleArrayList values, FixedBitSet set) {
|
||||||
super(false);
|
super(false);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.set = set;
|
this.set = set;
|
||||||
|
@ -221,16 +224,16 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getValue(int docId) {
|
public long getValue(int docId) {
|
||||||
return (long) values[docId];
|
return (long) values.get(docId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
|
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
|
||||||
|
|
||||||
private final double[] values;
|
private final BigDoubleArrayList values;
|
||||||
private final FixedBitSet set;
|
private final FixedBitSet set;
|
||||||
|
|
||||||
DoubleValues(double[] values, FixedBitSet set) {
|
DoubleValues(BigDoubleArrayList values, FixedBitSet set) {
|
||||||
super(false);
|
super(false);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.set = set;
|
this.set = set;
|
||||||
|
@ -243,7 +246,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getValue(int docId) {
|
public double getValue(int docId) {
|
||||||
return values[docId];
|
return values.get(docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -254,12 +257,15 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
*/
|
*/
|
||||||
public static class Single extends DoubleArrayAtomicFieldData {
|
public static class Single extends DoubleArrayAtomicFieldData {
|
||||||
|
|
||||||
|
private final BigDoubleArrayList values;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Note, here, we assume that there is no offset by 1 from docId, so position 0
|
* Note, here, we assume that there is no offset by 1 from docId, so position 0
|
||||||
* is the value for docId 0.
|
* is the value for docId 0.
|
||||||
*/
|
*/
|
||||||
public Single(double[] values, int numDocs) {
|
public Single(BigDoubleArrayList values, int numDocs) {
|
||||||
super(values, numDocs);
|
super(numDocs);
|
||||||
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -275,7 +281,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
@Override
|
@Override
|
||||||
public long getMemorySizeInBytes() {
|
public long getMemorySizeInBytes() {
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE);
|
size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes();
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -292,32 +298,32 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
|
|
||||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
|
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
|
||||||
|
|
||||||
private final double[] values;
|
private final BigDoubleArrayList values;
|
||||||
|
|
||||||
LongValues(double[] values) {
|
LongValues(BigDoubleArrayList values) {
|
||||||
super(false);
|
super(false);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getValue(int docId) {
|
public long getValue(int docId) {
|
||||||
return (long) values[docId];
|
return (long) values.get(docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
|
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
|
||||||
|
|
||||||
private final double[] values;
|
private final BigDoubleArrayList values;
|
||||||
|
|
||||||
DoubleValues(double[] values) {
|
DoubleValues(BigDoubleArrayList values) {
|
||||||
super(false);
|
super(false);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getValue(int docId) {
|
public double getValue(int docId) {
|
||||||
return values[docId];
|
return values.get(docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import gnu.trove.list.array.TDoubleArrayList;
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -27,11 +26,11 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.NumericUtils;
|
import org.apache.lucene.util.NumericUtils;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.RamUsage;
|
import org.elasticsearch.common.RamUsage;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.util.BigDoubleArrayList;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.fielddata.*;
|
import org.elasticsearch.index.fielddata.*;
|
||||||
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
|
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
|
||||||
|
@ -49,7 +48,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
|
||||||
public static class Builder implements IndexFieldData.Builder {
|
public static class Builder implements IndexFieldData.Builder {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
public IndexFieldData<?> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||||
return new DoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
return new DoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -92,11 +91,11 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
|
||||||
return DoubleArrayAtomicFieldData.EMPTY;
|
return DoubleArrayAtomicFieldData.EMPTY;
|
||||||
}
|
}
|
||||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||||
final TDoubleArrayList values = new TDoubleArrayList();
|
final BigDoubleArrayList values = new BigDoubleArrayList();
|
||||||
|
|
||||||
values.add(0); // first "t" indicates null value
|
values.add(0); // first "t" indicates null value
|
||||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio);
|
||||||
try {
|
try {
|
||||||
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
||||||
BytesRef term;
|
BytesRef term;
|
||||||
|
@ -113,23 +112,23 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
|
||||||
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_DOUBLE;
|
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_DOUBLE;
|
||||||
long ordinalsSize = build.getMemorySizeInBytes();
|
long ordinalsSize = build.getMemorySizeInBytes();
|
||||||
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
||||||
return new DoubleArrayAtomicFieldData.WithOrdinals(values.toArray(new double[values.size()]), reader.maxDoc(), build);
|
return new DoubleArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
|
||||||
}
|
}
|
||||||
|
|
||||||
double[] sValues = new double[reader.maxDoc()];
|
|
||||||
int maxDoc = reader.maxDoc();
|
int maxDoc = reader.maxDoc();
|
||||||
|
BigDoubleArrayList sValues = new BigDoubleArrayList(maxDoc);
|
||||||
for (int i = 0; i < maxDoc; i++) {
|
for (int i = 0; i < maxDoc; i++) {
|
||||||
sValues[i] = values.get(ordinals.getOrd(i));
|
sValues.add(values.get(ordinals.getOrd(i)));
|
||||||
}
|
}
|
||||||
|
assert sValues.size() == maxDoc;
|
||||||
if (set == null) {
|
if (set == null) {
|
||||||
return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc());
|
return new DoubleArrayAtomicFieldData.Single(sValues, maxDoc);
|
||||||
} else {
|
} else {
|
||||||
return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
|
return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, maxDoc, set);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return new DoubleArrayAtomicFieldData.WithOrdinals(
|
return new DoubleArrayAtomicFieldData.WithOrdinals(
|
||||||
values.toArray(new double[values.size()]),
|
values,
|
||||||
reader.maxDoc(),
|
reader.maxDoc(),
|
||||||
build);
|
build);
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,12 +21,10 @@ package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.*;
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
|
||||||
import org.apache.lucene.util.fst.FST;
|
|
||||||
import org.apache.lucene.util.fst.FST.Arc;
|
import org.apache.lucene.util.fst.FST.Arc;
|
||||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.elasticsearch.common.util.BigIntArray;
|
||||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||||
|
@ -46,7 +44,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
|
||||||
// 0 ordinal in values means no value (its null)
|
// 0 ordinal in values means no value (its null)
|
||||||
protected final Ordinals ordinals;
|
protected final Ordinals ordinals;
|
||||||
|
|
||||||
private volatile int[] hashes;
|
private volatile BigIntArray hashes;
|
||||||
private long size = -1;
|
private long size = -1;
|
||||||
|
|
||||||
private final FST<Long> fst;
|
private final FST<Long> fst;
|
||||||
|
@ -104,18 +102,17 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
|
||||||
assert fst != null;
|
assert fst != null;
|
||||||
if (hashes == null) {
|
if (hashes == null) {
|
||||||
BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
|
BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
|
||||||
int[] hashes = new int[ordinals.getMaxOrd()];
|
BigIntArray hashes = new BigIntArray(ordinals.getMaxOrd());
|
||||||
InputOutput<Long> next;
|
|
||||||
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
|
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
|
||||||
// empty strings twice. ie. them merge fails for long output.
|
// empty strings twice. ie. them merge fails for long output.
|
||||||
hashes[0] = new BytesRef().hashCode();
|
hashes.set(0, new BytesRef().hashCode());
|
||||||
int i = 1;
|
|
||||||
try {
|
try {
|
||||||
while ((next = fstEnum.next()) != null) {
|
for (long i = 1, maxOrd = ordinals.getMaxOrd(); i < maxOrd; ++i) {
|
||||||
hashes[i++] = next.input.hashCode();
|
hashes.set(i, fstEnum.next().input.hashCode());
|
||||||
}
|
}
|
||||||
} catch (IOException ex) {
|
assert fstEnum.next() == null;
|
||||||
//bogus
|
} catch (IOException e) {
|
||||||
|
throw new AssertionError("Cannot happen", e);
|
||||||
}
|
}
|
||||||
this.hashes = hashes;
|
this.hashes = hashes;
|
||||||
}
|
}
|
||||||
|
@ -141,7 +138,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
|
public BytesRef getValueScratchByOrd(long ord, BytesRef ret) {
|
||||||
if (ord == 0) {
|
if (ord == 0) {
|
||||||
ret.length = 0;
|
ret.length = 0;
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -170,16 +167,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iter getIter(int docId) {
|
public Iter getIter(int docId) {
|
||||||
int ord = ordinals.getOrd(docId);
|
long ord = ordinals.getOrd(docId);
|
||||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||||
return iter.reset(getValueByOrd(ord), ord);
|
return iter.reset(getValueByOrd(ord), ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class SingleHashed extends Single {
|
static final class SingleHashed extends Single {
|
||||||
private final int[] hashes;
|
private final BigIntArray hashes;
|
||||||
|
|
||||||
SingleHashed(FST<Long> fst, Docs ordinals, int[] hashes) {
|
SingleHashed(FST<Long> fst, Docs ordinals, BigIntArray hashes) {
|
||||||
super(fst, ordinals);
|
super(fst, ordinals);
|
||||||
this.hashes = hashes;
|
this.hashes = hashes;
|
||||||
}
|
}
|
||||||
|
@ -188,16 +185,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
|
||||||
protected Iter.Single newSingleIter() {
|
protected Iter.Single newSingleIter() {
|
||||||
return new Iter.Single() {
|
return new Iter.Single() {
|
||||||
public int hash() {
|
public int hash() {
|
||||||
return hashes[ord];
|
return hashes.get(ord);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getValueHashed(int docId, BytesRef ret) {
|
public int getValueHashed(int docId, BytesRef ret) {
|
||||||
final int ord = ordinals.getOrd(docId);
|
final long ord = ordinals.getOrd(docId);
|
||||||
getValueScratchByOrd(ord, ret);
|
getValueScratchByOrd(ord, ret);
|
||||||
return hashes[ord];
|
return hashes.get(ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,9 +216,9 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
|
||||||
|
|
||||||
|
|
||||||
static final class MultiHashed extends Multi {
|
static final class MultiHashed extends Multi {
|
||||||
private final int[] hashes;
|
private final BigIntArray hashes;
|
||||||
|
|
||||||
MultiHashed(FST<Long> fst, Docs ordinals, int[] hashes) {
|
MultiHashed(FST<Long> fst, Docs ordinals, BigIntArray hashes) {
|
||||||
super(fst, ordinals);
|
super(fst, ordinals);
|
||||||
this.hashes = hashes;
|
this.hashes = hashes;
|
||||||
}
|
}
|
||||||
|
@ -230,16 +227,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
|
||||||
protected Iter.Multi newMultiIter() {
|
protected Iter.Multi newMultiIter() {
|
||||||
return new Iter.Multi(this) {
|
return new Iter.Multi(this) {
|
||||||
public int hash() {
|
public int hash() {
|
||||||
return hashes[ord];
|
return hashes.get(ord);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getValueHashed(int docId, BytesRef ret) {
|
public int getValueHashed(int docId, BytesRef ret) {
|
||||||
final int ord = ordinals.getOrd(docId);
|
final long ord = ordinals.getOrd(docId);
|
||||||
getValueScratchByOrd(ord, ret);
|
getValueScratchByOrd(ord, ret);
|
||||||
return hashes[ord];
|
return hashes.get(ord);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
|
||||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||||
|
@ -65,9 +64,14 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
|
||||||
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
|
||||||
final IntsRef scratch = new IntsRef();
|
final IntsRef scratch = new IntsRef();
|
||||||
|
|
||||||
boolean preDefineBitsRequired = regex == null && frequency == null;
|
final long numTerms;
|
||||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
if (regex == null && frequency == null) {
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio);
|
numTerms = terms.size();
|
||||||
|
} else {
|
||||||
|
numTerms = -1;
|
||||||
|
}
|
||||||
|
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
||||||
|
OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio);
|
||||||
try {
|
try {
|
||||||
|
|
||||||
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
|
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
|
||||||
|
@ -75,7 +79,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
|
||||||
TermsEnum termsEnum = filter(terms, reader);
|
TermsEnum termsEnum = filter(terms, reader);
|
||||||
DocsEnum docsEnum = null;
|
DocsEnum docsEnum = null;
|
||||||
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
|
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
|
||||||
final int termOrd = builder.nextOrdinal();
|
final long termOrd = builder.nextOrdinal();
|
||||||
assert termOrd > 0;
|
assert termOrd > 0;
|
||||||
fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd);
|
fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd);
|
||||||
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
|
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
|
||||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.elasticsearch.common.RamUsage;
|
import org.elasticsearch.common.RamUsage;
|
||||||
|
import org.elasticsearch.common.util.BigFloatArrayList;
|
||||||
import org.elasticsearch.index.fielddata.*;
|
import org.elasticsearch.index.fielddata.*;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||||
|
|
||||||
|
@ -30,14 +31,12 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
|
|
||||||
public static final FloatArrayAtomicFieldData EMPTY = new Empty();
|
public static final FloatArrayAtomicFieldData EMPTY = new Empty();
|
||||||
|
|
||||||
protected final float[] values;
|
|
||||||
private final int numDocs;
|
private final int numDocs;
|
||||||
|
|
||||||
protected long size = -1;
|
protected long size = -1;
|
||||||
|
|
||||||
public FloatArrayAtomicFieldData(float[] values, int numDocs) {
|
public FloatArrayAtomicFieldData(int numDocs) {
|
||||||
super(true);
|
super(true);
|
||||||
this.values = values;
|
|
||||||
this.numDocs = numDocs;
|
this.numDocs = numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,7 +52,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
static class Empty extends FloatArrayAtomicFieldData {
|
static class Empty extends FloatArrayAtomicFieldData {
|
||||||
|
|
||||||
Empty() {
|
Empty() {
|
||||||
super(null, 0);
|
super(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -95,9 +94,11 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
public static class WithOrdinals extends FloatArrayAtomicFieldData {
|
public static class WithOrdinals extends FloatArrayAtomicFieldData {
|
||||||
|
|
||||||
private final Ordinals ordinals;
|
private final Ordinals ordinals;
|
||||||
|
private final BigFloatArrayList values;
|
||||||
|
|
||||||
public WithOrdinals(float[] values, int numDocs, Ordinals ordinals) {
|
public WithOrdinals(BigFloatArrayList values, int numDocs, Ordinals ordinals) {
|
||||||
super(values, numDocs);
|
super(numDocs);
|
||||||
|
this.values = values;
|
||||||
this.ordinals = ordinals;
|
this.ordinals = ordinals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +115,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
@Override
|
@Override
|
||||||
public long getMemorySizeInBytes() {
|
public long getMemorySizeInBytes() {
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT) + ordinals.getMemorySizeInBytes();
|
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes();
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -131,31 +132,31 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
|
|
||||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
|
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
|
||||||
|
|
||||||
private final float[] values;
|
private final BigFloatArrayList values;
|
||||||
|
|
||||||
LongValues(float[] values, Ordinals.Docs ordinals) {
|
LongValues(BigFloatArrayList values, Ordinals.Docs ordinals) {
|
||||||
super(ordinals);
|
super(ordinals);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getValueByOrd(int ord) {
|
public long getValueByOrd(long ord) {
|
||||||
return (long) values[ord];
|
return (long) values.get(ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
|
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
|
||||||
|
|
||||||
private final float[] values;
|
private final BigFloatArrayList values;
|
||||||
|
|
||||||
DoubleValues(float[] values, Ordinals.Docs ordinals) {
|
DoubleValues(BigFloatArrayList values, Ordinals.Docs ordinals) {
|
||||||
super(ordinals);
|
super(ordinals);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getValueByOrd(int ord) {
|
public double getValueByOrd(long ord) {
|
||||||
return values[ord];
|
return values.get(ord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -166,10 +167,12 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
*/
|
*/
|
||||||
public static class SingleFixedSet extends FloatArrayAtomicFieldData {
|
public static class SingleFixedSet extends FloatArrayAtomicFieldData {
|
||||||
|
|
||||||
|
private final BigFloatArrayList values;
|
||||||
private final FixedBitSet set;
|
private final FixedBitSet set;
|
||||||
|
|
||||||
public SingleFixedSet(float[] values, int numDocs, FixedBitSet set) {
|
public SingleFixedSet(BigFloatArrayList values, int numDocs, FixedBitSet set) {
|
||||||
super(values, numDocs);
|
super(numDocs);
|
||||||
|
this.values = values;
|
||||||
this.set = set;
|
this.set = set;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,7 +189,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
@Override
|
@Override
|
||||||
public long getMemorySizeInBytes() {
|
public long getMemorySizeInBytes() {
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
|
size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -204,10 +207,10 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
|
|
||||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
|
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
|
||||||
|
|
||||||
private final float[] values;
|
private final BigFloatArrayList values;
|
||||||
private final FixedBitSet set;
|
private final FixedBitSet set;
|
||||||
|
|
||||||
LongValues(float[] values, FixedBitSet set) {
|
LongValues(BigFloatArrayList values, FixedBitSet set) {
|
||||||
super(false);
|
super(false);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.set = set;
|
this.set = set;
|
||||||
|
@ -220,16 +223,16 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getValue(int docId) {
|
public long getValue(int docId) {
|
||||||
return (long) values[docId];
|
return (long) values.get(docId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
|
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
|
||||||
|
|
||||||
private final float[] values;
|
private final BigFloatArrayList values;
|
||||||
private final FixedBitSet set;
|
private final FixedBitSet set;
|
||||||
|
|
||||||
DoubleValues(float[] values, FixedBitSet set) {
|
DoubleValues(BigFloatArrayList values, FixedBitSet set) {
|
||||||
super(false);
|
super(false);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.set = set;
|
this.set = set;
|
||||||
|
@ -242,7 +245,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getValue(int docId) {
|
public double getValue(int docId) {
|
||||||
return (double) values[docId];
|
return (double) values.get(docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -254,12 +257,15 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
*/
|
*/
|
||||||
public static class Single extends FloatArrayAtomicFieldData {
|
public static class Single extends FloatArrayAtomicFieldData {
|
||||||
|
|
||||||
|
private final BigFloatArrayList values;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Note, here, we assume that there is no offset by 1 from docId, so position 0
|
* Note, here, we assume that there is no offset by 1 from docId, so position 0
|
||||||
* is the value for docId 0.
|
* is the value for docId 0.
|
||||||
*/
|
*/
|
||||||
public Single(float[] values, int numDocs) {
|
public Single(BigFloatArrayList values, int numDocs) {
|
||||||
super(values, numDocs);
|
super(numDocs);
|
||||||
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -275,7 +281,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
@Override
|
@Override
|
||||||
public long getMemorySizeInBytes() {
|
public long getMemorySizeInBytes() {
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT);
|
size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes();
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -293,32 +299,32 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
|
||||||
|
|
||||||
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
|
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
|
||||||
|
|
||||||
private final float[] values;
|
private final BigFloatArrayList values;
|
||||||
|
|
||||||
LongValues(float[] values) {
|
LongValues(BigFloatArrayList values) {
|
||||||
super(false);
|
super(false);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getValue(int docId) {
|
public long getValue(int docId) {
|
||||||
return (long) values[docId];
|
return (long) values.get(docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
|
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
|
||||||
|
|
||||||
private final float[] values;
|
private final BigFloatArrayList values;
|
||||||
|
|
||||||
DoubleValues(float[] values) {
|
DoubleValues(BigFloatArrayList values) {
|
||||||
super(false);
|
super(false);
|
||||||
this.values = values;
|
this.values = values;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getValue(int docId) {
|
public double getValue(int docId) {
|
||||||
return (double) values[docId];
|
return (double) values.get(docId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import gnu.trove.list.array.TFloatArrayList;
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -27,11 +26,11 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.NumericUtils;
|
import org.apache.lucene.util.NumericUtils;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.RamUsage;
|
import org.elasticsearch.common.RamUsage;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.util.BigFloatArrayList;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.fielddata.*;
|
import org.elasticsearch.index.fielddata.*;
|
||||||
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
|
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
|
||||||
|
@ -49,7 +48,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
|
||||||
public static class Builder implements IndexFieldData.Builder {
|
public static class Builder implements IndexFieldData.Builder {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
public IndexFieldData<?> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||||
return new FloatArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
return new FloatArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -91,12 +90,12 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
|
||||||
return FloatArrayAtomicFieldData.EMPTY;
|
return FloatArrayAtomicFieldData.EMPTY;
|
||||||
}
|
}
|
||||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||||
final TFloatArrayList values = new TFloatArrayList();
|
final BigFloatArrayList values = new BigFloatArrayList();
|
||||||
|
|
||||||
values.add(0); // first "t" indicates null value
|
values.add(0); // first "t" indicates null value
|
||||||
|
|
||||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio);
|
||||||
try {
|
try {
|
||||||
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
||||||
BytesRef term;
|
BytesRef term;
|
||||||
|
@ -110,25 +109,26 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
|
||||||
|
|
||||||
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
|
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
|
||||||
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_FLOAT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
|
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_FLOAT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
|
||||||
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_FLOAT;
|
long uniqueValuesArraySize = values.sizeInBytes();
|
||||||
long ordinalsSize = build.getMemorySizeInBytes();
|
long ordinalsSize = build.getMemorySizeInBytes();
|
||||||
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
|
||||||
return new FloatArrayAtomicFieldData.WithOrdinals(values.toArray(new float[values.size()]), reader.maxDoc(), build);
|
return new FloatArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
|
||||||
}
|
}
|
||||||
|
|
||||||
float[] sValues = new float[reader.maxDoc()];
|
|
||||||
int maxDoc = reader.maxDoc();
|
int maxDoc = reader.maxDoc();
|
||||||
|
BigFloatArrayList sValues = new BigFloatArrayList(maxDoc);
|
||||||
for (int i = 0; i < maxDoc; i++) {
|
for (int i = 0; i < maxDoc; i++) {
|
||||||
sValues[i] = values.get(ordinals.getOrd(i));
|
sValues.add(values.get(ordinals.getOrd(i)));
|
||||||
}
|
}
|
||||||
|
assert sValues.size() == maxDoc;
|
||||||
if (set == null) {
|
if (set == null) {
|
||||||
return new FloatArrayAtomicFieldData.Single(sValues, reader.maxDoc());
|
return new FloatArrayAtomicFieldData.Single(sValues, maxDoc);
|
||||||
} else {
|
} else {
|
||||||
return new FloatArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
|
return new FloatArrayAtomicFieldData.SingleFixedSet(sValues, maxDoc, set);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return new FloatArrayAtomicFieldData.WithOrdinals(
|
return new FloatArrayAtomicFieldData.WithOrdinals(
|
||||||
values.toArray(new float[values.size()]),
|
values,
|
||||||
reader.maxDoc(),
|
reader.maxDoc(),
|
||||||
build);
|
build);
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.elasticsearch.common.RamUsage;
|
import org.elasticsearch.common.RamUsage;
|
||||||
import org.elasticsearch.common.geo.GeoPoint;
|
import org.elasticsearch.common.geo.GeoPoint;
|
||||||
|
import org.elasticsearch.common.util.BigDoubleArrayList;
|
||||||
import org.elasticsearch.index.fielddata.AtomicGeoPointFieldData;
|
import org.elasticsearch.index.fielddata.AtomicGeoPointFieldData;
|
||||||
import org.elasticsearch.index.fielddata.BytesValues;
|
import org.elasticsearch.index.fielddata.BytesValues;
|
||||||
import org.elasticsearch.index.fielddata.GeoPointValues;
|
import org.elasticsearch.index.fielddata.GeoPointValues;
|
||||||
|
@ -34,15 +35,11 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
|
|
||||||
public static final GeoPointDoubleArrayAtomicFieldData EMPTY = new Empty();
|
public static final GeoPointDoubleArrayAtomicFieldData EMPTY = new Empty();
|
||||||
|
|
||||||
protected final double[] lon;
|
|
||||||
protected final double[] lat;
|
|
||||||
private final int numDocs;
|
private final int numDocs;
|
||||||
|
|
||||||
protected long size = -1;
|
protected long size = -1;
|
||||||
|
|
||||||
public GeoPointDoubleArrayAtomicFieldData(double[] lon, double[] lat, int numDocs) {
|
public GeoPointDoubleArrayAtomicFieldData(int numDocs) {
|
||||||
this.lon = lon;
|
|
||||||
this.lat = lat;
|
|
||||||
this.numDocs = numDocs;
|
this.numDocs = numDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +60,7 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
static class Empty extends GeoPointDoubleArrayAtomicFieldData {
|
static class Empty extends GeoPointDoubleArrayAtomicFieldData {
|
||||||
|
|
||||||
Empty() {
|
Empty() {
|
||||||
super(null, null, 0);
|
super(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -99,10 +96,13 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
|
|
||||||
public static class WithOrdinals extends GeoPointDoubleArrayAtomicFieldData {
|
public static class WithOrdinals extends GeoPointDoubleArrayAtomicFieldData {
|
||||||
|
|
||||||
|
private final BigDoubleArrayList lon, lat;
|
||||||
private final Ordinals ordinals;
|
private final Ordinals ordinals;
|
||||||
|
|
||||||
public WithOrdinals(double[] lon, double[] lat, int numDocs, Ordinals ordinals) {
|
public WithOrdinals(BigDoubleArrayList lon, BigDoubleArrayList lat, int numDocs, Ordinals ordinals) {
|
||||||
super(lon, lat, numDocs);
|
super(numDocs);
|
||||||
|
this.lon = lon;
|
||||||
|
this.lat = lat;
|
||||||
this.ordinals = ordinals;
|
this.ordinals = ordinals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,7 +119,7 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
@Override
|
@Override
|
||||||
public long getMemorySizeInBytes() {
|
public long getMemorySizeInBytes() {
|
||||||
if (size == -1) {
|
if (size == -1) {
|
||||||
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + (RamUsage.NUM_BYTES_ARRAY_HEADER + (lon.length * RamUsage.NUM_BYTES_DOUBLE)) + (RamUsage.NUM_BYTES_ARRAY_HEADER + (lat.length * RamUsage.NUM_BYTES_DOUBLE)) + ordinals.getMemorySizeInBytes();
|
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + lon.sizeInBytes() + lat.sizeInBytes();
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
@ -131,15 +131,14 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
|
|
||||||
static class GeoPointValues implements org.elasticsearch.index.fielddata.GeoPointValues {
|
static class GeoPointValues implements org.elasticsearch.index.fielddata.GeoPointValues {
|
||||||
|
|
||||||
private final double[] lon;
|
private final BigDoubleArrayList lon, lat;
|
||||||
private final double[] lat;
|
|
||||||
private final Ordinals.Docs ordinals;
|
private final Ordinals.Docs ordinals;
|
||||||
|
|
||||||
private final GeoPoint scratch = new GeoPoint();
|
private final GeoPoint scratch = new GeoPoint();
|
||||||
private final ValuesIter valuesIter;
|
private final ValuesIter valuesIter;
|
||||||
private final SafeValuesIter safeValuesIter;
|
private final SafeValuesIter safeValuesIter;
|
||||||
|
|
||||||
GeoPointValues(double[] lon, double[] lat, Ordinals.Docs ordinals) {
|
GeoPointValues(BigDoubleArrayList lon, BigDoubleArrayList lat, Ordinals.Docs ordinals) {
|
||||||
this.lon = lon;
|
this.lon = lon;
|
||||||
this.lat = lat;
|
this.lat = lat;
|
||||||
this.ordinals = ordinals;
|
this.ordinals = ordinals;
|
||||||
|
@ -159,20 +158,20 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GeoPoint getValue(int docId) {
|
public GeoPoint getValue(int docId) {
|
||||||
int ord = ordinals.getOrd(docId);
|
long ord = ordinals.getOrd(docId);
|
||||||
if (ord == 0) {
|
if (ord == 0L) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return scratch.reset(lat[ord], lon[ord]);
|
return scratch.reset(lat.get(ord), lon.get(ord));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GeoPoint getValueSafe(int docId) {
|
public GeoPoint getValueSafe(int docId) {
|
||||||
int ord = ordinals.getOrd(docId);
|
long ord = ordinals.getOrd(docId);
|
||||||
if (ord == 0) {
|
if (ord == 0L) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return new GeoPoint(lat[ord], lon[ord]);
|
return new GeoPoint(lat.get(ord), lon.get(ord));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -188,14 +187,13 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
|
|
||||||
static class ValuesIter implements Iter {
|
static class ValuesIter implements Iter {
|
||||||
|
|
||||||
private final double[] lon;
|
private final BigDoubleArrayList lon, lat;
|
||||||
private final double[] lat;
|
|
||||||
private final GeoPoint scratch = new GeoPoint();
|
private final GeoPoint scratch = new GeoPoint();
|
||||||
|
|
||||||
private Ordinals.Docs.Iter ordsIter;
|
private Ordinals.Docs.Iter ordsIter;
|
||||||
private int ord;
|
private long ord;
|
||||||
|
|
||||||
ValuesIter(double[] lon, double[] lat) {
|
ValuesIter(BigDoubleArrayList lon, BigDoubleArrayList lat) {
|
||||||
this.lon = lon;
|
this.lon = lon;
|
||||||
this.lat = lat;
|
this.lat = lat;
|
||||||
}
|
}
|
||||||
|
@ -213,7 +211,7 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GeoPoint next() {
|
public GeoPoint next() {
|
||||||
scratch.reset(lat[ord], lon[ord]);
|
scratch.reset(lat.get(ord), lon.get(ord));
|
||||||
ord = ordsIter.next();
|
ord = ordsIter.next();
|
||||||
return scratch;
|
return scratch;
|
||||||
}
|
}
|
||||||
|
@ -221,13 +219,12 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
|
|
||||||
static class SafeValuesIter implements Iter {
|
static class SafeValuesIter implements Iter {
|
||||||
|
|
||||||
private final double[] lon;
|
private final BigDoubleArrayList lon, lat;
|
||||||
private final double[] lat;
|
|
||||||
|
|
||||||
private Ordinals.Docs.Iter ordsIter;
|
private Ordinals.Docs.Iter ordsIter;
|
||||||
private int ord;
|
private long ord;
|
||||||
|
|
||||||
SafeValuesIter(double[] lon, double[] lat) {
|
SafeValuesIter(BigDoubleArrayList lon, BigDoubleArrayList lat) {
|
||||||
this.lon = lon;
|
this.lon = lon;
|
||||||
this.lat = lat;
|
this.lat = lat;
|
||||||
}
|
}
|
||||||
|
@ -245,7 +242,7 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GeoPoint next() {
|
public GeoPoint next() {
|
||||||
GeoPoint value = new GeoPoint(lat[ord], lon[ord]);
|
GeoPoint value = new GeoPoint(lat.get(ord), lon.get(ord));
|
||||||
ord = ordsIter.next();
|
ord = ordsIter.next();
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
@ -258,10 +255,13 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
*/
|
*/
|
||||||
public static class SingleFixedSet extends GeoPointDoubleArrayAtomicFieldData {
|
public static class SingleFixedSet extends GeoPointDoubleArrayAtomicFieldData {
|
||||||
|
|
||||||
|
private final double[] lon, lat;
|
||||||
private final FixedBitSet set;
|
private final FixedBitSet set;
|
||||||
|
|
||||||
public SingleFixedSet(double[] lon, double[] lat, int numDocs, FixedBitSet set) {
|
public SingleFixedSet(double[] lon, double[] lat, int numDocs, FixedBitSet set) {
|
||||||
super(lon, lat, numDocs);
|
super(numDocs);
|
||||||
|
this.lon = lon;
|
||||||
|
this.lat = lat;
|
||||||
this.set = set;
|
this.set = set;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -357,8 +357,12 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
|
||||||
*/
|
*/
|
||||||
public static class Single extends GeoPointDoubleArrayAtomicFieldData {
|
public static class Single extends GeoPointDoubleArrayAtomicFieldData {
|
||||||
|
|
||||||
|
private final double[] lon, lat;
|
||||||
|
|
||||||
public Single(double[] lon, double[] lat, int numDocs) {
|
public Single(double[] lon, double[] lat, int numDocs) {
|
||||||
super(lon, lat, numDocs);
|
super(numDocs);
|
||||||
|
this.lon = lon;
|
||||||
|
this.lat = lat;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,16 +19,15 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.fielddata.plain;
|
package org.elasticsearch.index.fielddata.plain;
|
||||||
|
|
||||||
import gnu.trove.list.array.TDoubleArrayList;
|
|
||||||
import org.apache.lucene.index.AtomicReader;
|
import org.apache.lucene.index.AtomicReader;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.util.*;
|
import org.apache.lucene.util.*;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.util.BigDoubleArrayList;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.fielddata.*;
|
import org.elasticsearch.index.fielddata.*;
|
||||||
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
|
||||||
|
@ -45,7 +44,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
|
||||||
public static class Builder implements IndexFieldData.Builder {
|
public static class Builder implements IndexFieldData.Builder {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
public IndexFieldData<?> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
|
||||||
return new GeoPointDoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
return new GeoPointDoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -83,12 +82,12 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
|
||||||
return GeoPointDoubleArrayAtomicFieldData.EMPTY;
|
return GeoPointDoubleArrayAtomicFieldData.EMPTY;
|
||||||
}
|
}
|
||||||
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
|
||||||
final TDoubleArrayList lat = new TDoubleArrayList();
|
final BigDoubleArrayList lat = new BigDoubleArrayList();
|
||||||
final TDoubleArrayList lon = new TDoubleArrayList();
|
final BigDoubleArrayList lon = new BigDoubleArrayList();
|
||||||
lat.add(0); // first "t" indicates null value
|
lat.add(0); // first "t" indicates null value
|
||||||
lon.add(0); // first "t" indicates null value
|
lon.add(0); // first "t" indicates null value
|
||||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(), acceptableTransientOverheadRatio);
|
||||||
final CharsRef spare = new CharsRef();
|
final CharsRef spare = new CharsRef();
|
||||||
try {
|
try {
|
||||||
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null));
|
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null));
|
||||||
|
@ -113,7 +112,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
|
||||||
double[] sLat = new double[reader.maxDoc()];
|
double[] sLat = new double[reader.maxDoc()];
|
||||||
double[] sLon = new double[reader.maxDoc()];
|
double[] sLon = new double[reader.maxDoc()];
|
||||||
for (int i = 0; i < sLat.length; i++) {
|
for (int i = 0; i < sLat.length; i++) {
|
||||||
int nativeOrdinal = ordinals.getOrd(i);
|
long nativeOrdinal = ordinals.getOrd(i);
|
||||||
sLat[i] = lat.get(nativeOrdinal);
|
sLat[i] = lat.get(nativeOrdinal);
|
||||||
sLon[i] = lon.get(nativeOrdinal);
|
sLon[i] = lon.get(nativeOrdinal);
|
||||||
}
|
}
|
||||||
|
@ -125,8 +124,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return new GeoPointDoubleArrayAtomicFieldData.WithOrdinals(
|
return new GeoPointDoubleArrayAtomicFieldData.WithOrdinals(
|
||||||
lon.toArray(new double[lon.size()]),
|
lon, lat,
|
||||||
lat.toArray(new double[lat.size()]),
|
|
||||||
reader.maxDoc(), build);
|
reader.maxDoc(), build);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -141,7 +141,7 @@ public abstract class PackedArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getValueByOrd(int ord) {
|
public long getValueByOrd(long ord) {
|
||||||
return ord == 0 ? 0L : values.get(ord - 1);
|
return ord == 0 ? 0L : values.get(ord - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -156,7 +156,7 @@ public abstract class PackedArrayAtomicFieldData extends AtomicNumericFieldData
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public double getValueByOrd(int ord) {
|
public double getValueByOrd(long ord) {
|
||||||
return ord == 0 ? 0L : values.get(ord - 1);
|
return ord == 0 ? 0L : values.get(ord - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -110,8 +110,8 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
||||||
// longs is going to be monotonically increasing
|
// longs is going to be monotonically increasing
|
||||||
final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();
|
final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();
|
||||||
|
|
||||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
|
OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
|
||||||
try {
|
try {
|
||||||
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
|
||||||
BytesRef term;
|
BytesRef term;
|
||||||
|
@ -161,6 +161,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
||||||
|
|
||||||
final long delta = maxValue - minValue;
|
final long delta = maxValue - minValue;
|
||||||
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
|
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
|
||||||
|
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||||
final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
|
||||||
|
|
||||||
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
|
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
|
||||||
|
@ -177,7 +178,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
|
||||||
sValues.fill(0, sValues.size(), missingValue);
|
sValues.fill(0, sValues.size(), missingValue);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||||
final int ord = ordinals.getOrd(i);
|
final long ord = ordinals.getOrd(i);
|
||||||
if (ord > 0) {
|
if (ord > 0) {
|
||||||
sValues.set(i, values.get(ord - 1) - minValue);
|
sValues.set(i, values.get(ord - 1) - minValue);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.PagedBytes.Reader;
|
import org.apache.lucene.util.PagedBytes.Reader;
|
||||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||||
|
import org.elasticsearch.common.util.BigIntArray;
|
||||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
|
||||||
|
@ -42,7 +43,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
|
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
|
||||||
protected final Ordinals ordinals;
|
protected final Ordinals ordinals;
|
||||||
|
|
||||||
private volatile int[] hashes;
|
private volatile BigIntArray hashes;
|
||||||
private long size = -1;
|
private long size = -1;
|
||||||
private final long readerBytesSize;
|
private final long readerBytesSize;
|
||||||
|
|
||||||
|
@ -85,14 +86,14 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final int[] getHashes() {
|
private final BigIntArray getHashes() {
|
||||||
if (hashes == null) {
|
if (hashes == null) {
|
||||||
int numberOfValues = (int) termOrdToBytesOffset.size();
|
long numberOfValues = termOrdToBytesOffset.size();
|
||||||
int[] hashes = new int[numberOfValues];
|
BigIntArray hashes = new BigIntArray(numberOfValues);
|
||||||
BytesRef scratch = new BytesRef();
|
BytesRef scratch = new BytesRef();
|
||||||
for (int i = 0; i < numberOfValues; i++) {
|
for (long i = 0; i < numberOfValues; i++) {
|
||||||
bytes.fill(scratch, termOrdToBytesOffset.get(i));
|
bytes.fill(scratch, termOrdToBytesOffset.get(i));
|
||||||
hashes[i] = scratch.hashCode();
|
hashes.set(i, scratch.hashCode());
|
||||||
}
|
}
|
||||||
this.hashes = hashes;
|
this.hashes = hashes;
|
||||||
}
|
}
|
||||||
|
@ -107,7 +108,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getHashedBytesValues() {
|
public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getHashedBytesValues() {
|
||||||
final int[] hashes = getHashes();
|
final BigIntArray hashes = getHashes();
|
||||||
return ordinals.isMultiValued() ? new BytesValues.MultiHashed(hashes, bytes, termOrdToBytesOffset, ordinals.ordinals())
|
return ordinals.isMultiValued() ? new BytesValues.MultiHashed(hashes, bytes, termOrdToBytesOffset, ordinals.ordinals())
|
||||||
: new BytesValues.SingleHashed(hashes, bytes, termOrdToBytesOffset, ordinals.ordinals());
|
: new BytesValues.SingleHashed(hashes, bytes, termOrdToBytesOffset, ordinals.ordinals());
|
||||||
}
|
}
|
||||||
|
@ -145,7 +146,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
|
public BytesRef getValueScratchByOrd(long ord, BytesRef ret) {
|
||||||
bytes.fill(ret, termOrdToBytesOffset.get(ord));
|
bytes.fill(ret, termOrdToBytesOffset.get(ord));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -163,7 +164,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iter getIter(int docId) {
|
public Iter getIter(int docId) {
|
||||||
int ord = ordinals.getOrd(docId);
|
long ord = ordinals.getOrd(docId);
|
||||||
if (ord == 0) return Iter.Empty.INSTANCE;
|
if (ord == 0) return Iter.Empty.INSTANCE;
|
||||||
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
|
||||||
return iter.reset(scratch, ord);
|
return iter.reset(scratch, ord);
|
||||||
|
@ -172,9 +173,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class SingleHashed extends Single {
|
static final class SingleHashed extends Single {
|
||||||
private final int[] hashes;
|
private final BigIntArray hashes;
|
||||||
|
|
||||||
SingleHashed(int[] hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
|
SingleHashed(BigIntArray hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
|
||||||
super(bytes, termOrdToBytesOffset, ordinals);
|
super(bytes, termOrdToBytesOffset, ordinals);
|
||||||
this.hashes = hashes;
|
this.hashes = hashes;
|
||||||
}
|
}
|
||||||
|
@ -183,16 +184,16 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
protected Iter.Single newSingleIter() {
|
protected Iter.Single newSingleIter() {
|
||||||
return new Iter.Single() {
|
return new Iter.Single() {
|
||||||
public int hash() {
|
public int hash() {
|
||||||
return hashes[ord];
|
return hashes.get(ord);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getValueHashed(int docId, BytesRef ret) {
|
public int getValueHashed(int docId, BytesRef ret) {
|
||||||
final int ord = ordinals.getOrd(docId);
|
final long ord = ordinals.getOrd(docId);
|
||||||
getValueScratchByOrd(ord, ret);
|
getValueScratchByOrd(ord, ret);
|
||||||
return hashes[ord];
|
return hashes.get(ord);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -216,9 +217,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
|
|
||||||
static final class MultiHashed extends Multi {
|
static final class MultiHashed extends Multi {
|
||||||
|
|
||||||
private final int[] hashes;
|
private final BigIntArray hashes;
|
||||||
|
|
||||||
MultiHashed(int[] hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
|
MultiHashed(BigIntArray hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
|
||||||
super(bytes, termOrdToBytesOffset, ordinals);
|
super(bytes, termOrdToBytesOffset, ordinals);
|
||||||
this.hashes = hashes;
|
this.hashes = hashes;
|
||||||
}
|
}
|
||||||
|
@ -227,16 +228,16 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
|
||||||
protected Iter.Multi newMultiIter() {
|
protected Iter.Multi newMultiIter() {
|
||||||
return new Iter.Multi(this) {
|
return new Iter.Multi(this) {
|
||||||
public int hash() {
|
public int hash() {
|
||||||
return hashes[ord];
|
return hashes.get(ord);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getValueHashed(int docId, BytesRef ret) {
|
public int getValueHashed(int docId, BytesRef ret) {
|
||||||
int ord = ordinals.getOrd(docId);
|
long ord = ordinals.getOrd(docId);
|
||||||
getValueScratchByOrd(ord, ret);
|
getValueScratchByOrd(ord, ret);
|
||||||
return hashes[ord];
|
return hashes.get(ord);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||||
|
@ -61,39 +60,23 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
|
||||||
|
|
||||||
final PagedBytes bytes = new PagedBytes(15);
|
final PagedBytes bytes = new PagedBytes(15);
|
||||||
|
|
||||||
int maxDoc = reader.maxDoc();
|
|
||||||
final int termCountHardLimit;
|
|
||||||
if (maxDoc == Integer.MAX_VALUE) {
|
|
||||||
termCountHardLimit = Integer.MAX_VALUE;
|
|
||||||
} else {
|
|
||||||
termCountHardLimit = maxDoc + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try for coarse estimate for number of bits; this
|
|
||||||
// should be an underestimate most of the time, which
|
|
||||||
// is fine -- GrowableWriter will reallocate as needed
|
|
||||||
long numUniqueTerms = terms.size();
|
|
||||||
if (numUniqueTerms != -1L) {
|
|
||||||
if (numUniqueTerms > termCountHardLimit) {
|
|
||||||
// app is misusing the API (there is more than
|
|
||||||
// one term per doc); in this case we make best
|
|
||||||
// effort to load what we can (see LUCENE-2142)
|
|
||||||
numUniqueTerms = termCountHardLimit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
|
final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
|
||||||
termOrdToBytesOffset.add(0); // first ord is reserved for missing values
|
termOrdToBytesOffset.add(0); // first ord is reserved for missing values
|
||||||
boolean preDefineBitsRequired = regex == null && frequency == null;
|
final long numTerms;
|
||||||
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
if (regex == null && frequency == null) {
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio);
|
numTerms = terms.size();
|
||||||
|
} else {
|
||||||
|
numTerms = -1;
|
||||||
|
}
|
||||||
|
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
|
||||||
|
OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio);
|
||||||
try {
|
try {
|
||||||
// 0 is reserved for "unset"
|
// 0 is reserved for "unset"
|
||||||
bytes.copyUsingLengthPrefix(new BytesRef());
|
bytes.copyUsingLengthPrefix(new BytesRef());
|
||||||
TermsEnum termsEnum = filter(terms, reader);
|
TermsEnum termsEnum = filter(terms, reader);
|
||||||
DocsEnum docsEnum = null;
|
DocsEnum docsEnum = null;
|
||||||
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
|
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
|
||||||
final int termOrd = builder.nextOrdinal();
|
final long termOrd = builder.nextOrdinal();
|
||||||
assert termOrd == termOrdToBytesOffset.size();
|
assert termOrd == termOrdToBytesOffset.size();
|
||||||
termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
|
termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
|
||||||
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
|
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
|
||||||
|
|
|
@ -27,6 +27,8 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.elasticsearch.cache.recycler.CacheRecycler;
|
import org.elasticsearch.cache.recycler.CacheRecycler;
|
||||||
import org.elasticsearch.common.collect.BoundedTreeSet;
|
import org.elasticsearch.common.collect.BoundedTreeSet;
|
||||||
|
import org.elasticsearch.common.util.IntArray;
|
||||||
|
import org.elasticsearch.common.util.IntArrays;
|
||||||
import org.elasticsearch.index.fielddata.BytesValues;
|
import org.elasticsearch.index.fielddata.BytesValues;
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||||
|
@ -114,7 +116,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
|
||||||
BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we get around this?)
|
BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we get around this?)
|
||||||
int count = 0;
|
int count = 0;
|
||||||
do {
|
do {
|
||||||
count += agg.counts[agg.position];
|
count += agg.counts.get(agg.position);
|
||||||
if (agg.nextPosition()) {
|
if (agg.nextPosition()) {
|
||||||
agg = queue.updateTop();
|
agg = queue.updateTop();
|
||||||
} else {
|
} else {
|
||||||
|
@ -144,12 +146,6 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
|
||||||
list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop();
|
list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
for (ReaderAggregator aggregator : aggregators) {
|
|
||||||
if (aggregator.counts.length > ordinalsCacheAbove) {
|
|
||||||
cacheRecycler.pushIntArray(aggregator.counts);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
|
return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -160,7 +156,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
|
||||||
BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we work around that?)
|
BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we work around that?)
|
||||||
int count = 0;
|
int count = 0;
|
||||||
do {
|
do {
|
||||||
count += agg.counts[agg.position];
|
count += agg.counts.get(agg.position);
|
||||||
if (agg.nextPosition()) {
|
if (agg.nextPosition()) {
|
||||||
agg = queue.updateTop();
|
agg = queue.updateTop();
|
||||||
} else {
|
} else {
|
||||||
|
@ -186,13 +182,6 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (ReaderAggregator aggregator : aggregators) {
|
|
||||||
if (aggregator.counts.length > ordinalsCacheAbove) {
|
|
||||||
cacheRecycler.pushIntArray(aggregator.counts);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total);
|
return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,8 +196,8 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
|
||||||
@Override
|
@Override
|
||||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
if (current != null) {
|
if (current != null) {
|
||||||
missing += current.counts[0];
|
missing += current.counts.get(0);
|
||||||
total += current.total - current.counts[0];
|
total += current.total - current.counts.get(0);
|
||||||
if (current.values.ordinals().getNumOrds() > 0) {
|
if (current.values.ordinals().getNumOrds() > 0) {
|
||||||
aggregators.add(current);
|
aggregators.add(current);
|
||||||
}
|
}
|
||||||
|
@ -221,7 +210,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
|
||||||
@Override
|
@Override
|
||||||
public void collect(int doc) throws IOException {
|
public void collect(int doc) throws IOException {
|
||||||
Iter iter = ordinals.getIter(doc);
|
Iter iter = ordinals.getIter(doc);
|
||||||
int ord = iter.next();
|
long ord = iter.next();
|
||||||
current.onOrdinal(doc, ord);
|
current.onOrdinal(doc, ord);
|
||||||
while ((ord = iter.next()) != 0) {
|
while ((ord = iter.next()) != 0) {
|
||||||
current.onOrdinal(doc, ord);
|
current.onOrdinal(doc, ord);
|
||||||
|
@ -231,8 +220,8 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
|
||||||
@Override
|
@Override
|
||||||
public void postCollection() {
|
public void postCollection() {
|
||||||
if (current != null) {
|
if (current != null) {
|
||||||
missing += current.counts[0];
|
missing += current.counts.get(0);
|
||||||
total += current.total - current.counts[0];
|
total += current.total - current.counts.get(0);
|
||||||
// if we have values for this one, add it
|
// if we have values for this one, add it
|
||||||
if (current.values.ordinals().getNumOrds() > 0) {
|
if (current.values.ordinals().getNumOrds() > 0) {
|
||||||
aggregators.add(current);
|
aggregators.add(current);
|
||||||
|
@ -247,26 +236,21 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
|
||||||
public static final class ReaderAggregator {
|
public static final class ReaderAggregator {
|
||||||
|
|
||||||
final BytesValues.WithOrdinals values;
|
final BytesValues.WithOrdinals values;
|
||||||
final int[] counts;
|
final IntArray counts;
|
||||||
|
|
||||||
int position = 0;
|
long position = 0;
|
||||||
BytesRef current;
|
BytesRef current;
|
||||||
int total;
|
int total;
|
||||||
private final int maxOrd;
|
private final long maxOrd;
|
||||||
|
|
||||||
public ReaderAggregator(BytesValues.WithOrdinals values, int ordinalsCacheLimit, CacheRecycler cacheRecycler) {
|
public ReaderAggregator(BytesValues.WithOrdinals values, int ordinalsCacheLimit, CacheRecycler cacheRecycler) {
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.maxOrd = values.ordinals().getMaxOrd();
|
this.maxOrd = values.ordinals().getMaxOrd();
|
||||||
|
this.counts = IntArrays.allocate(maxOrd);
|
||||||
if (maxOrd > ordinalsCacheLimit) {
|
|
||||||
this.counts = cacheRecycler.popIntArray(maxOrd);
|
|
||||||
} else {
|
|
||||||
this.counts = new int[maxOrd];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final void onOrdinal(int docId, int ordinal) {
|
final void onOrdinal(int docId, long ordinal) {
|
||||||
counts[ordinal]++;
|
counts.increment(ordinal, 1);
|
||||||
total++;
|
total++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||||
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
||||||
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
||||||
Docs ordinals = bytesValues.ordinals();
|
Docs ordinals = bytesValues.ordinals();
|
||||||
assertThat(2, equalTo(ordinals.getNumOrds()));
|
assertThat(2L, equalTo(ordinals.getNumOrds()));
|
||||||
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
||||||
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
|
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
|
||||||
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
|
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
|
||||||
|
@ -95,7 +95,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||||
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
||||||
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
||||||
Docs ordinals = bytesValues.ordinals();
|
Docs ordinals = bytesValues.ordinals();
|
||||||
assertThat(1, equalTo(ordinals.getNumOrds()));
|
assertThat(1L, equalTo(ordinals.getNumOrds()));
|
||||||
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
||||||
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5"));
|
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5"));
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||||
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
||||||
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
||||||
Docs ordinals = bytesValues.ordinals();
|
Docs ordinals = bytesValues.ordinals();
|
||||||
assertThat(2, equalTo(ordinals.getNumOrds()));
|
assertThat(2L, equalTo(ordinals.getNumOrds()));
|
||||||
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
||||||
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
|
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
|
||||||
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
|
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
|
||||||
|
@ -122,7 +122,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||||
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
||||||
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
||||||
Docs ordinals = bytesValues.ordinals();
|
Docs ordinals = bytesValues.ordinals();
|
||||||
assertThat(2, equalTo(ordinals.getNumOrds()));
|
assertThat(2L, equalTo(ordinals.getNumOrds()));
|
||||||
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
||||||
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
|
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
|
||||||
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
|
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
|
||||||
|
@ -139,7 +139,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||||
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
||||||
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
||||||
Docs ordinals = bytesValues.ordinals();
|
Docs ordinals = bytesValues.ordinals();
|
||||||
assertThat(1, equalTo(ordinals.getNumOrds()));
|
assertThat(1L, equalTo(ordinals.getNumOrds()));
|
||||||
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
||||||
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("100"));
|
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("100"));
|
||||||
}
|
}
|
||||||
|
@ -184,7 +184,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||||
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
||||||
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
||||||
Docs ordinals = bytesValues.ordinals();
|
Docs ordinals = bytesValues.ordinals();
|
||||||
assertThat(1, equalTo(ordinals.getNumOrds()));
|
assertThat(1L, equalTo(ordinals.getNumOrds()));
|
||||||
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
||||||
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5"));
|
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5"));
|
||||||
}
|
}
|
||||||
|
@ -196,7 +196,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||||
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
|
||||||
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
|
||||||
Docs ordinals = bytesValues.ordinals();
|
Docs ordinals = bytesValues.ordinals();
|
||||||
assertThat(2, equalTo(ordinals.getNumOrds()));
|
assertThat(2L, equalTo(ordinals.getNumOrds()));
|
||||||
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
assertThat(1000, equalTo(ordinals.getNumDocs()));
|
||||||
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
|
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
|
||||||
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("5"));
|
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("5"));
|
||||||
|
|
|
@ -1,35 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to ElasticSearch and Shay Banon under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. ElasticSearch licenses this
|
|
||||||
* file to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing,
|
|
||||||
* software distributed under the License is distributed on an
|
|
||||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
* KIND, either express or implied. See the License for the
|
|
||||||
* specific language governing permissions and limitations
|
|
||||||
* under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.elasticsearch.test.unit.index.fielddata.ordinals;
|
|
||||||
|
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
|
||||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
|
||||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*/
|
|
||||||
public class FlatMultiOrdinalsTests extends MultiOrdinalsTests {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) {
|
|
||||||
settings.put("multi_ordinals", "flat");
|
|
||||||
return builder.build(settings.build());
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -19,8 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.test.unit.index.fielddata.ordinals;
|
package org.elasticsearch.test.unit.index.fielddata.ordinals;
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
import org.apache.lucene.util.LongsRef;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
|
import org.elasticsearch.index.fielddata.ordinals.MultiOrdinals;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
@ -30,18 +31,20 @@ import java.util.*;
|
||||||
|
|
||||||
import static org.hamcrest.MatcherAssert.assertThat;
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.hamcrest.Matchers.greaterThan;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*/
|
*/
|
||||||
public abstract class MultiOrdinalsTests {
|
public class MultiOrdinalsTests {
|
||||||
|
|
||||||
protected final Ordinals creationMultiOrdinals(OrdinalsBuilder builder) {
|
protected final Ordinals creationMultiOrdinals(OrdinalsBuilder builder) {
|
||||||
return this.creationMultiOrdinals(builder, ImmutableSettings.builder());
|
return this.creationMultiOrdinals(builder, ImmutableSettings.builder());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected abstract Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings);
|
protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) {
|
||||||
|
return builder.build(settings.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRandomValues() throws IOException {
|
public void testRandomValues() throws IOException {
|
||||||
|
@ -74,7 +77,7 @@ public abstract class MultiOrdinalsTests {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
int lastOrd = -1;
|
long lastOrd = -1;
|
||||||
for (OrdAndId ordAndId : ordsAndIds) {
|
for (OrdAndId ordAndId : ordsAndIds) {
|
||||||
if (lastOrd != ordAndId.ord) {
|
if (lastOrd != ordAndId.ord) {
|
||||||
lastOrd = ordAndId.ord;
|
lastOrd = ordAndId.ord;
|
||||||
|
@ -105,27 +108,27 @@ public abstract class MultiOrdinalsTests {
|
||||||
Ordinals ords = creationMultiOrdinals(builder);
|
Ordinals ords = creationMultiOrdinals(builder);
|
||||||
Ordinals.Docs docs = ords.ordinals();
|
Ordinals.Docs docs = ords.ordinals();
|
||||||
int docId = ordsAndIds.get(0).id;
|
int docId = ordsAndIds.get(0).id;
|
||||||
List<Integer> docOrds = new ArrayList<Integer>();
|
List<Long> docOrds = new ArrayList<Long>();
|
||||||
for (OrdAndId ordAndId : ordsAndIds) {
|
for (OrdAndId ordAndId : ordsAndIds) {
|
||||||
if (docId == ordAndId.id) {
|
if (docId == ordAndId.id) {
|
||||||
docOrds.add(ordAndId.ord);
|
docOrds.add(ordAndId.ord);
|
||||||
} else {
|
} else {
|
||||||
if (!docOrds.isEmpty()) {
|
if (!docOrds.isEmpty()) {
|
||||||
assertThat(docs.getOrd(docId), equalTo(docOrds.get(0)));
|
assertThat(docs.getOrd(docId), equalTo(docOrds.get(0)));
|
||||||
IntsRef ref = docs.getOrds(docId);
|
LongsRef ref = docs.getOrds(docId);
|
||||||
assertThat(ref.offset, equalTo(0));
|
assertThat(ref.offset, equalTo(0));
|
||||||
|
|
||||||
for (int i = ref.offset; i < ref.length; i++) {
|
for (int i = ref.offset; i < ref.length; i++) {
|
||||||
assertThat(ref.ints[i], equalTo(docOrds.get(i)));
|
assertThat(ref.longs[i], equalTo(docOrds.get(i)));
|
||||||
}
|
}
|
||||||
final int[] array = new int[docOrds.size()];
|
final long[] array = new long[docOrds.size()];
|
||||||
for (int i = 0; i < array.length; i++) {
|
for (int i = 0; i < array.length; i++) {
|
||||||
array[i] = docOrds.get(i);
|
array[i] = docOrds.get(i);
|
||||||
}
|
}
|
||||||
assertIter(docs.getIter(docId), array);
|
assertIter(docs.getIter(docId), array);
|
||||||
}
|
}
|
||||||
for (int i = docId + 1; i < ordAndId.id; i++) {
|
for (int i = docId + 1; i < ordAndId.id; i++) {
|
||||||
assertThat(docs.getOrd(i), equalTo(0));
|
assertThat(docs.getOrd(i), equalTo(0L));
|
||||||
}
|
}
|
||||||
docId = ordAndId.id;
|
docId = ordAndId.id;
|
||||||
docOrds.clear();
|
docOrds.clear();
|
||||||
|
@ -137,10 +140,10 @@ public abstract class MultiOrdinalsTests {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class OrdAndId {
|
public static class OrdAndId {
|
||||||
final int ord;
|
final long ord;
|
||||||
final int id;
|
final int id;
|
||||||
|
|
||||||
public OrdAndId(int ord, int id) {
|
public OrdAndId(long ord, int id) {
|
||||||
this.ord = ord;
|
this.ord = ord;
|
||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
@ -150,7 +153,7 @@ public abstract class MultiOrdinalsTests {
|
||||||
final int prime = 31;
|
final int prime = 31;
|
||||||
int result = 1;
|
int result = 1;
|
||||||
result = prime * result + id;
|
result = prime * result + id;
|
||||||
result = prime * result + ord;
|
result = prime * result + (int) ord;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,7 +177,7 @@ public abstract class MultiOrdinalsTests {
|
||||||
@Test
|
@Test
|
||||||
public void testOrdinals() throws Exception {
|
public void testOrdinals() throws Exception {
|
||||||
int maxDoc = 7;
|
int maxDoc = 7;
|
||||||
int maxOrds = 32;
|
long maxOrds = 32;
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc);
|
OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc);
|
||||||
builder.nextOrdinal(); // 1
|
builder.nextOrdinal(); // 1
|
||||||
builder.addDoc(1).addDoc(4).addDoc(5).addDoc(6);
|
builder.addDoc(1).addDoc(4).addDoc(5).addDoc(6);
|
||||||
|
@ -186,97 +189,99 @@ public abstract class MultiOrdinalsTests {
|
||||||
builder.addDoc(0).addDoc(4).addDoc(5).addDoc(6);
|
builder.addDoc(0).addDoc(4).addDoc(5).addDoc(6);
|
||||||
builder.nextOrdinal(); // 5
|
builder.nextOrdinal(); // 5
|
||||||
builder.addDoc(4).addDoc(5).addDoc(6);
|
builder.addDoc(4).addDoc(5).addDoc(6);
|
||||||
int ord = builder.nextOrdinal(); // 6
|
long ord = builder.nextOrdinal(); // 6
|
||||||
builder.addDoc(4).addDoc(5).addDoc(6);
|
builder.addDoc(4).addDoc(5).addDoc(6);
|
||||||
for (int i = ord; i < maxOrds; i++) {
|
for (long i = ord; i < maxOrds; i++) {
|
||||||
builder.nextOrdinal();
|
builder.nextOrdinal();
|
||||||
builder.addDoc(5).addDoc(6);
|
builder.addDoc(5).addDoc(6);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long[][] ordinalPlan = new long[][] {
|
||||||
|
{2, 4},
|
||||||
|
{1},
|
||||||
|
{3},
|
||||||
|
{},
|
||||||
|
{1, 3, 4, 5, 6},
|
||||||
|
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},
|
||||||
|
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}
|
||||||
|
};
|
||||||
|
|
||||||
Ordinals ordinals = creationMultiOrdinals(builder);
|
Ordinals ordinals = creationMultiOrdinals(builder);
|
||||||
Ordinals.Docs docs = ordinals.ordinals();
|
Ordinals.Docs docs = ordinals.ordinals();
|
||||||
assertThat(docs.getNumDocs(), equalTo(maxDoc));
|
assertEquals(docs, ordinalPlan);
|
||||||
assertThat(docs.getNumOrds(), equalTo(maxOrds));
|
|
||||||
assertThat(docs.getMaxOrd(), equalTo(maxOrds + 1)); // Includes null ord
|
|
||||||
assertThat(docs.isMultiValued(), equalTo(true));
|
|
||||||
assertThat(ordinals.getMemorySizeInBytes(), greaterThan(0l));
|
|
||||||
|
|
||||||
// Document 1
|
|
||||||
assertThat(docs.getOrd(0), equalTo(2));
|
|
||||||
IntsRef ref = docs.getOrds(0);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
assertThat(ref.ints[0], equalTo(2));
|
|
||||||
assertThat(ref.ints[1], equalTo(4));
|
|
||||||
assertThat(ref.length, equalTo(2));
|
|
||||||
assertIter(docs.getIter(0), 2, 4);
|
|
||||||
|
|
||||||
// Document 2
|
|
||||||
assertThat(docs.getOrd(1), equalTo(1));
|
|
||||||
ref = docs.getOrds(1);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
assertThat(ref.ints[0], equalTo(1));
|
|
||||||
assertThat(ref.length, equalTo(1));
|
|
||||||
assertIter(docs.getIter(1), 1);
|
|
||||||
|
|
||||||
// Document 3
|
|
||||||
assertThat(docs.getOrd(2), equalTo(3));
|
|
||||||
ref = docs.getOrds(2);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
assertThat(ref.ints[0], equalTo(3));
|
|
||||||
assertThat(ref.length, equalTo(1));
|
|
||||||
assertIter(docs.getIter(2), 3);
|
|
||||||
|
|
||||||
// Document 4
|
|
||||||
assertThat(docs.getOrd(3), equalTo(0));
|
|
||||||
ref = docs.getOrds(3);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
assertThat(ref.length, equalTo(0));
|
|
||||||
assertIter(docs.getIter(3));
|
|
||||||
|
|
||||||
// Document 5
|
|
||||||
assertThat(docs.getOrd(4), equalTo(1));
|
|
||||||
ref = docs.getOrds(4);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
assertThat(ref.ints[0], equalTo(1));
|
|
||||||
assertThat(ref.ints[1], equalTo(3));
|
|
||||||
assertThat(ref.ints[2], equalTo(4));
|
|
||||||
assertThat(ref.ints[3], equalTo(5));
|
|
||||||
assertThat(ref.ints[4], equalTo(6));
|
|
||||||
assertThat(ref.length, equalTo(5));
|
|
||||||
assertIter(docs.getIter(4), 1, 3, 4, 5, 6);
|
|
||||||
|
|
||||||
// Document 6
|
|
||||||
assertThat(docs.getOrd(5), equalTo(1));
|
|
||||||
ref = docs.getOrds(5);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
int[] expectedOrds = new int[maxOrds];
|
|
||||||
for (int i = 0; i < maxOrds; i++) {
|
|
||||||
expectedOrds[i] = i + 1;
|
|
||||||
assertThat(ref.ints[i], equalTo(i + 1));
|
|
||||||
}
|
|
||||||
assertIter(docs.getIter(5), expectedOrds);
|
|
||||||
assertThat(ref.length, equalTo(maxOrds));
|
|
||||||
|
|
||||||
// Document 7
|
|
||||||
assertThat(docs.getOrd(6), equalTo(1));
|
|
||||||
ref = docs.getOrds(6);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
expectedOrds = new int[maxOrds];
|
|
||||||
for (int i = 0; i < maxOrds; i++) {
|
|
||||||
expectedOrds[i] = i + 1;
|
|
||||||
assertThat(ref.ints[i], equalTo(i + 1));
|
|
||||||
}
|
|
||||||
assertIter(docs.getIter(6), expectedOrds);
|
|
||||||
assertThat(ref.length, equalTo(maxOrds));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static void assertIter(Ordinals.Docs.Iter iter, int... expectedOrdinals) {
|
protected static void assertIter(Ordinals.Docs.Iter iter, long... expectedOrdinals) {
|
||||||
for (int expectedOrdinal : expectedOrdinals) {
|
for (long expectedOrdinal : expectedOrdinals) {
|
||||||
assertThat(iter.next(), equalTo(expectedOrdinal));
|
assertThat(iter.next(), equalTo(expectedOrdinal));
|
||||||
}
|
}
|
||||||
assertThat(iter.next(), equalTo(0)); // Last one should always be 0
|
assertThat(iter.next(), equalTo(0L)); // Last one should always be 0
|
||||||
assertThat(iter.next(), equalTo(0)); // Just checking it stays 0
|
assertThat(iter.next(), equalTo(0L)); // Just checking it stays 0
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiValuesDocsWithOverlappingStorageArrays() throws Exception {
|
||||||
|
int maxDoc = 7;
|
||||||
|
long maxOrds = 15;
|
||||||
|
OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc);
|
||||||
|
for (int i = 0; i < maxOrds; i++) {
|
||||||
|
builder.nextOrdinal();
|
||||||
|
if (i < 10) {
|
||||||
|
builder.addDoc(0);
|
||||||
|
}
|
||||||
|
builder.addDoc(1);
|
||||||
|
if (i == 0) {
|
||||||
|
builder.addDoc(2);
|
||||||
|
}
|
||||||
|
if (i < 5) {
|
||||||
|
builder.addDoc(3);
|
||||||
|
|
||||||
|
}
|
||||||
|
if (i < 6) {
|
||||||
|
builder.addDoc(4);
|
||||||
|
|
||||||
|
}
|
||||||
|
if (i == 1) {
|
||||||
|
builder.addDoc(5);
|
||||||
|
}
|
||||||
|
if (i < 10) {
|
||||||
|
builder.addDoc(6);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
long[][] ordinalPlan = new long[][] {
|
||||||
|
{1,2,3,4,5,6,7,8,9,10},
|
||||||
|
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15},
|
||||||
|
{1},
|
||||||
|
{1,2,3,4,5},
|
||||||
|
{1,2,3,4,5,6},
|
||||||
|
{2},
|
||||||
|
{1,2,3,4,5,6,7,8,9,10}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ordinals ordinals = new MultiOrdinals(builder);
|
||||||
|
Ordinals.Docs docs = ordinals.ordinals();
|
||||||
|
assertEquals(docs, ordinalPlan);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertEquals(Ordinals.Docs docs, long[][] ordinalPlan) {
|
||||||
|
long numOrds = 0;
|
||||||
|
for (int doc = 0; doc < ordinalPlan.length; ++doc) {
|
||||||
|
if (ordinalPlan[doc].length > 0) {
|
||||||
|
numOrds = Math.max(numOrds, ordinalPlan[doc][ordinalPlan[doc].length - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertThat(docs.getNumDocs(), equalTo(ordinalPlan.length));
|
||||||
|
assertThat(docs.getNumOrds(), equalTo(numOrds)); // Includes null ord
|
||||||
|
assertThat(docs.getMaxOrd(), equalTo(numOrds + 1));
|
||||||
|
assertThat(docs.isMultiValued(), equalTo(true));
|
||||||
|
for (int doc = 0; doc < ordinalPlan.length; ++doc) {
|
||||||
|
LongsRef ref = docs.getOrds(doc);
|
||||||
|
assertThat(ref.offset, equalTo(0));
|
||||||
|
long[] ords = ordinalPlan[doc];
|
||||||
|
assertThat(ref, equalTo(new LongsRef(ords, 0, ords.length)));
|
||||||
|
assertIter(docs.getIter(doc), ords);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,9 +40,9 @@ public class SingleOrdinalsTests {
|
||||||
public void testSvValues() throws IOException {
|
public void testSvValues() throws IOException {
|
||||||
int numDocs = 1000000;
|
int numDocs = 1000000;
|
||||||
int numOrdinals = numDocs / 4;
|
int numOrdinals = numDocs / 4;
|
||||||
Map<Integer, Integer> controlDocToOrdinal = new HashMap<Integer, Integer>();
|
Map<Integer, Long> controlDocToOrdinal = new HashMap<Integer, Long>();
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
|
OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
|
||||||
int ordinal = builder.nextOrdinal();
|
long ordinal = builder.nextOrdinal();
|
||||||
for (int doc = 0; doc < numDocs; doc++) {
|
for (int doc = 0; doc < numDocs; doc++) {
|
||||||
if (doc % numOrdinals == 0) {
|
if (doc % numOrdinals == 0) {
|
||||||
ordinal = builder.nextOrdinal();
|
ordinal = builder.nextOrdinal();
|
||||||
|
@ -56,7 +56,7 @@ public class SingleOrdinalsTests {
|
||||||
Ordinals.Docs docs = ords.ordinals();
|
Ordinals.Docs docs = ords.ordinals();
|
||||||
|
|
||||||
assertThat(controlDocToOrdinal.size(), equalTo(docs.getNumDocs()));
|
assertThat(controlDocToOrdinal.size(), equalTo(docs.getNumDocs()));
|
||||||
for (Map.Entry<Integer, Integer> entry : controlDocToOrdinal.entrySet()) {
|
for (Map.Entry<Integer, Long> entry : controlDocToOrdinal.entrySet()) {
|
||||||
assertThat(entry.getValue(), equalTo(docs.getOrd(entry.getKey())));
|
assertThat(entry.getValue(), equalTo(docs.getOrd(entry.getKey())));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,164 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to ElasticSearch and Shay Banon under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. ElasticSearch licenses this
|
|
||||||
* file to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing,
|
|
||||||
* software distributed under the License is distributed on an
|
|
||||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
* KIND, either express or implied. See the License for the
|
|
||||||
* specific language governing permissions and limitations
|
|
||||||
* under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.elasticsearch.test.unit.index.fielddata.ordinals;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.IntsRef;
|
|
||||||
import org.elasticsearch.ElasticSearchException;
|
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
|
|
||||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
|
||||||
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
|
|
||||||
import org.elasticsearch.index.fielddata.ordinals.SparseMultiArrayOrdinals;
|
|
||||||
import org.testng.annotations.Test;
|
|
||||||
|
|
||||||
import static org.hamcrest.MatcherAssert.assertThat;
|
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
|
||||||
import static org.testng.Assert.fail;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*/
|
|
||||||
public class SparseMultiOrdinalsTests extends MultiOrdinalsTests {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) {
|
|
||||||
settings.put("multi_ordinals", "sparse");
|
|
||||||
return builder.build(settings.build());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMultiValuesSurpassOrdinalsLimit() throws Exception {
|
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(2);
|
|
||||||
int maxOrds = 128;
|
|
||||||
for (int i = 0; i < maxOrds; i++) {
|
|
||||||
builder.nextOrdinal();
|
|
||||||
if (i == 2 || i == 4) {
|
|
||||||
builder.addDoc(0);
|
|
||||||
}
|
|
||||||
builder.addDoc(1);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
Builder builder2 = ImmutableSettings.builder();
|
|
||||||
builder2.put("multi_ordinals_max_docs", 64);
|
|
||||||
creationMultiOrdinals(builder, builder2);
|
|
||||||
fail("Exception should have been throwed");
|
|
||||||
} catch (ElasticSearchException e) {
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMultiValuesDocsWithOverlappingStorageArrays() throws Exception {
|
|
||||||
int maxDoc = 7;
|
|
||||||
int maxOrds = 15;
|
|
||||||
OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc);
|
|
||||||
for (int i = 0; i < maxOrds; i++) {
|
|
||||||
builder.nextOrdinal();
|
|
||||||
if (i < 10) {
|
|
||||||
builder.addDoc(0);
|
|
||||||
}
|
|
||||||
builder.addDoc(1);
|
|
||||||
if (i == 0) {
|
|
||||||
builder.addDoc(2);
|
|
||||||
}
|
|
||||||
if (i < 5) {
|
|
||||||
builder.addDoc(3);
|
|
||||||
|
|
||||||
}
|
|
||||||
if (i < 6) {
|
|
||||||
builder.addDoc(4);
|
|
||||||
|
|
||||||
}
|
|
||||||
if (i == 1) {
|
|
||||||
builder.addDoc(5);
|
|
||||||
}
|
|
||||||
if (i < 10) {
|
|
||||||
builder.addDoc(6);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ordinals ordinals = new SparseMultiArrayOrdinals(builder, 64);
|
|
||||||
Ordinals.Docs docs = ordinals.ordinals();
|
|
||||||
assertThat(docs.getNumDocs(), equalTo(maxDoc));
|
|
||||||
assertThat(docs.getNumOrds(), equalTo(maxOrds)); // Includes null ord
|
|
||||||
assertThat(docs.isMultiValued(), equalTo(true));
|
|
||||||
|
|
||||||
// Document 1
|
|
||||||
assertThat(docs.getOrd(0), equalTo(1));
|
|
||||||
IntsRef ref = docs.getOrds(0);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
assertThat(ref.ints[i], equalTo(i + 1));
|
|
||||||
}
|
|
||||||
assertThat(ref.length, equalTo(10));
|
|
||||||
|
|
||||||
// Document 2
|
|
||||||
assertThat(docs.getOrd(1), equalTo(1));
|
|
||||||
ref = docs.getOrds(1);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
for (int i = 0; i < 15; i++) {
|
|
||||||
assertThat(ref.ints[i], equalTo(i + 1));
|
|
||||||
}
|
|
||||||
assertThat(ref.length, equalTo(15));
|
|
||||||
|
|
||||||
// Document 3
|
|
||||||
assertThat(docs.getOrd(2), equalTo(1));
|
|
||||||
ref = docs.getOrds(2);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
assertThat(ref.ints[0], equalTo(1));
|
|
||||||
assertThat(ref.length, equalTo(1));
|
|
||||||
|
|
||||||
// Document 4
|
|
||||||
assertThat(docs.getOrd(3), equalTo(1));
|
|
||||||
ref = docs.getOrds(3);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
for (int i = 0; i < 5; i++) {
|
|
||||||
assertThat(ref.ints[i], equalTo(i + 1));
|
|
||||||
}
|
|
||||||
assertThat(ref.length, equalTo(5));
|
|
||||||
|
|
||||||
// Document 5
|
|
||||||
assertThat(docs.getOrd(4), equalTo(1));
|
|
||||||
ref = docs.getOrds(4);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
for (int i = 0; i < 6; i++) {
|
|
||||||
assertThat(ref.ints[i], equalTo(i + 1));
|
|
||||||
}
|
|
||||||
assertThat(ref.length, equalTo(6));
|
|
||||||
|
|
||||||
// Document 6
|
|
||||||
assertThat(docs.getOrd(5), equalTo(2));
|
|
||||||
ref = docs.getOrds(5);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
assertThat(ref.ints[0], equalTo(2));
|
|
||||||
assertThat(ref.length, equalTo(1));
|
|
||||||
|
|
||||||
// Document 7
|
|
||||||
assertThat(docs.getOrd(6), equalTo(1));
|
|
||||||
ref = docs.getOrds(6);
|
|
||||||
assertThat(ref.offset, equalTo(0));
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
assertThat(ref.ints[i], equalTo(i + 1));
|
|
||||||
}
|
|
||||||
assertThat(ref.length, equalTo(10));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
Loading…
Reference in New Issue