Make field data able to support more than 2B ordinals per segment.

Although segments are limited to 2B documents, there is not limit on the number
of unique values that a segment may store. This commit replaces 'int' with
'long' every time a number is used to represent an ordinal and modifies the
data-structures used to store ordinals so that they can actually support more
than 2B ordinals per segment.

This commit also improves memory usage of the multi-ordinals data-structures
and the transient memory usage which is required to build them (OrdinalsBuilder)
by using Lucene's PackedInts data-structures. In the end, loading the ordinals
mapping from disk may be a little slower, field-data-based features such as
faceting may be slightly slower or faster depending on whether being nicer to
the CPU caches balances the overhead of the additional abstraction or not, and
memory usage should be better in all cases, especially when the size of the
ordinals mapping is not negligible compared to the size of the values (numeric
data for example).

Close #3189
This commit is contained in:
Adrien Grand 2013-07-08 16:12:12 +02:00
parent 4d05c9cfd5
commit 12d9268db2
41 changed files with 1775 additions and 1334 deletions

View File

@ -0,0 +1,171 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import static org.apache.lucene.util.packed.XPackedInts.checkBlockSize;
import static org.apache.lucene.util.packed.XPackedInts.numBlocks;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Base implementation for {@link XPagedMutable} and {@link PagedGrowableWriter}.
* @lucene.internal
*/
abstract class XAbstractPagedMutable<T extends XAbstractPagedMutable<T>> {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
static final int MIN_BLOCK_SIZE = 1 << 6;
static final int MAX_BLOCK_SIZE = 1 << 30;
final long size;
final int pageShift;
final int pageMask;
final PackedInts.Mutable[] subMutables;
final int bitsPerValue;
XAbstractPagedMutable(int bitsPerValue, long size, int pageSize) {
this.bitsPerValue = bitsPerValue;
this.size = size;
pageShift = checkBlockSize(pageSize, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
pageMask = pageSize - 1;
final int numPages = numBlocks(size, pageSize);
subMutables = new PackedInts.Mutable[numPages];
}
protected final void fillPages() {
final int numPages = numBlocks(size, pageSize());
for (int i = 0; i < numPages; ++i) {
// do not allocate for more entries than necessary on the last page
final int valueCount = i == numPages - 1 ? lastPageSize(size) : pageSize();
subMutables[i] = newMutable(valueCount, bitsPerValue);
}
}
protected abstract PackedInts.Mutable newMutable(int valueCount, int bitsPerValue);
final int lastPageSize(long size) {
final int sz = indexInPage(size);
return sz == 0 ? pageSize() : sz;
}
final int pageSize() {
return pageMask + 1;
}
/** The number of values. */
public final long size() {
return size;
}
final int pageIndex(long index) {
return (int) (index >>> pageShift);
}
final int indexInPage(long index) {
return (int) index & pageMask;
}
/** Get value at <code>index</code>. */
public final long get(long index) {
assert index >= 0 && index < size;
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
return subMutables[pageIndex].get(indexInPage);
}
/** Set value at <code>index</code>. */
public final void set(long index, long value) {
assert index >= 0 && index < size;
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
subMutables[pageIndex].set(indexInPage, value);
}
protected long baseRamBytesUsed() {
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ RamUsageEstimator.NUM_BYTES_LONG
+ 3 * RamUsageEstimator.NUM_BYTES_INT;
}
/** Return the number of bytes used by this object. */
public long ramBytesUsed() {
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed());
bytesUsed += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * subMutables.length);
for (PackedInts.Mutable gw : subMutables) {
bytesUsed += gw.ramBytesUsed();
}
return bytesUsed;
}
protected abstract T newUnfilledCopy(long newSize);
/** Create a new copy of size <code>newSize</code> based on the content of
* this buffer. This method is much more efficient than creating a new
* instance and copying values one by one. */
public final T resize(long newSize) {
final T copy = newUnfilledCopy(newSize);
final int numCommonPages = Math.min(copy.subMutables.length, subMutables.length);
final long[] copyBuffer = new long[1024];
for (int i = 0; i < copy.subMutables.length; ++i) {
final int valueCount = i == copy.subMutables.length - 1 ? lastPageSize(newSize) : pageSize();
final int bpv = i < numCommonPages ? subMutables[i].getBitsPerValue() : this.bitsPerValue;
copy.subMutables[i] = newMutable(valueCount, bpv);
if (i < numCommonPages) {
final int copyLength = Math.min(valueCount, subMutables[i].size());
XPackedInts.copy(subMutables[i], 0, copy.subMutables[i], 0, copyLength, copyBuffer);
}
}
return copy;
}
/** Similar to {@link ArrayUtil#grow(long[], int)}. */
public final T grow(long minSize) {
assert minSize >= 0;
if (minSize <= size()) {
@SuppressWarnings("unchecked")
final T result = (T) this;
return result;
}
long extra = minSize >>> 3;
if (extra < 3) {
extra = 3;
}
final long newSize = minSize + extra;
return resize(newSize);
}
/** Similar to {@link ArrayUtil#grow(long[])}. */
public final T grow() {
return grow(size() + 1);
}
@Override
public final String toString() {
return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
}
}

View File

@ -0,0 +1,162 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Implements {@link XPackedInts.Mutable}, but grows the
* bit count of the underlying packed ints on-demand.
* <p>Beware that this class will accept to set negative values but in order
* to do this, it will grow the number of bits per value to 64.
*
* <p>@lucene.internal</p>
*/
public class XGrowableWriter implements PackedInts.Mutable {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
private long currentMask;
private PackedInts.Mutable current;
private final float acceptableOverheadRatio;
/**
* @param startBitsPerValue the initial number of bits per value, may grow depending on the data
* @param valueCount the number of values
* @param acceptableOverheadRatio an acceptable overhead ratio
*/
public XGrowableWriter(int startBitsPerValue, int valueCount, float acceptableOverheadRatio) {
this.acceptableOverheadRatio = acceptableOverheadRatio;
current = PackedInts.getMutable(valueCount, startBitsPerValue, this.acceptableOverheadRatio);
currentMask = mask(current.getBitsPerValue());
}
private static long mask(int bitsPerValue) {
return bitsPerValue == 64 ? ~0L : PackedInts.maxValue(bitsPerValue);
}
@Override
public long get(int index) {
return current.get(index);
}
@Override
public int size() {
return current.size();
}
@Override
public int getBitsPerValue() {
return current.getBitsPerValue();
}
public PackedInts.Mutable getMutable() {
return current;
}
@Override
public Object getArray() {
return current.getArray();
}
@Override
public boolean hasArray() {
return current.hasArray();
}
private void ensureCapacity(long value) {
if ((value & currentMask) == value) {
return;
}
final int bitsRequired = value < 0 ? 64 : PackedInts.bitsRequired(value);
assert bitsRequired > current.getBitsPerValue();
final int valueCount = size();
PackedInts.Mutable next = PackedInts.getMutable(valueCount, bitsRequired, acceptableOverheadRatio);
PackedInts.copy(current, 0, next, 0, valueCount, PackedInts.DEFAULT_BUFFER_SIZE);
current = next;
currentMask = mask(current.getBitsPerValue());
}
@Override
public void set(int index, long value) {
ensureCapacity(value);
current.set(index, value);
}
@Override
public void clear() {
current.clear();
}
public XGrowableWriter resize(int newSize) {
XGrowableWriter next = new XGrowableWriter(getBitsPerValue(), newSize, acceptableOverheadRatio);
final int limit = Math.min(size(), newSize);
PackedInts.copy(current, 0, next, 0, limit, PackedInts.DEFAULT_BUFFER_SIZE);
return next;
}
@Override
public int get(int index, long[] arr, int off, int len) {
return current.get(index, arr, off, len);
}
@Override
public int set(int index, long[] arr, int off, int len) {
long max = 0;
for (int i = off, end = off + len; i < end; ++i) {
// bitwise or is nice because either all values are positive and the
// or-ed result will require as many bits per value as the max of the
// values, or one of them is negative and the result will be negative,
// forcing GrowableWriter to use 64 bits per value
max |= arr[i];
}
ensureCapacity(max);
return current.set(index, arr, off, len);
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
ensureCapacity(val);
current.fill(fromIndex, toIndex, val);
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ RamUsageEstimator.NUM_BYTES_LONG
+ RamUsageEstimator.NUM_BYTES_FLOAT)
+ current.ramBytesUsed();
}
@Override
public void save(DataOutput out) throws IOException {
current.save(out);
}
}

View File

@ -0,0 +1,88 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Version;
import org.apache.lucene.util.packed.PackedInts.Mutable;
import org.apache.lucene.util.packed.PackedInts.Reader;
import org.elasticsearch.common.lucene.Lucene;
/**
* Simplistic compression for array of unsigned long values.
* Each value is >= 0 and <= a specified maximum value. The
* values are stored as packed ints, with each value
* consuming a fixed number of bits.
*
* @lucene.internal
*/
public class XPackedInts {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
/** Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer. */
static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
assert buf.length > 0;
int remaining = 0;
while (len > 0) {
final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
assert read > 0;
srcPos += read;
len -= read;
remaining += read;
final int written = dest.set(destPos, buf, 0, remaining);
assert written > 0;
destPos += written;
if (written < remaining) {
System.arraycopy(buf, written, buf, 0, remaining - written);
}
remaining -= written;
}
while (remaining > 0) {
final int written = dest.set(destPos, buf, 0, remaining);
destPos += written;
remaining -= written;
System.arraycopy(buf, written, buf, 0, remaining);
}
}
/** Check that the block size is a power of 2, in the right bounds, and return
* its log in base 2. */
static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
if (blockSize < minBlockSize || blockSize > maxBlockSize) {
throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
}
if ((blockSize & (blockSize - 1)) != 0) {
throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
}
return Integer.numberOfTrailingZeros(blockSize);
}
/** Return the number of blocks required to store <code>size</code> values on
* <code>blockSize</code>. */
static int numBlocks(long size, int blockSize) {
final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
if ((long) numBlocks * blockSize < size) {
throw new IllegalArgumentException("size is too large for this block size");
}
return numBlocks;
}
}

View File

@ -0,0 +1,79 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts.Mutable;
/**
* A {@link XPagedGrowableWriter}. This class slices data into fixed-size blocks
* which have independent numbers of bits per value and grow on-demand.
* <p>You should use this class instead of {@link AppendingLongBuffer} only when
* you need random write-access. Otherwise this class will likely be slower and
* less memory-efficient.
* @lucene.internal
*/
public final class XPagedGrowableWriter extends XAbstractPagedMutable<XPagedGrowableWriter> {
static {
// LUCENE MONITOR: this should be in Lucene 4.4 copied from Revision: 1492640.
assert Lucene.VERSION == Version.LUCENE_43 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
final float acceptableOverheadRatio;
/**
* Create a new {@link XPagedGrowableWriter} instance.
*
* @param size the number of values to store.
* @param pageSize the number of values per page
* @param startBitsPerValue the initial number of bits per value
* @param acceptableOverheadRatio an acceptable overhead ratio
*/
public XPagedGrowableWriter(long size, int pageSize,
int startBitsPerValue, float acceptableOverheadRatio) {
this(size, pageSize, startBitsPerValue, acceptableOverheadRatio, true);
}
XPagedGrowableWriter(long size, int pageSize,int startBitsPerValue, float acceptableOverheadRatio, boolean fillPages) {
super(startBitsPerValue, size, pageSize);
this.acceptableOverheadRatio = acceptableOverheadRatio;
if (fillPages) {
fillPages();
}
}
@Override
protected Mutable newMutable(int valueCount, int bitsPerValue) {
return new XGrowableWriter(bitsPerValue, valueCount, acceptableOverheadRatio);
}
@Override
protected XPagedGrowableWriter newUnfilledCopy(long newSize) {
return new XPagedGrowableWriter(newSize, pageSize(), bitsPerValue, acceptableOverheadRatio, false);
}
@Override
protected long baseRamBytesUsed() {
return super.baseRamBytesUsed() + RamUsageEstimator.NUM_BYTES_FLOAT;
}
}

View File

@ -0,0 +1,68 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.util;
import com.google.common.base.Preconditions;
/** Common implementation for array lists that slice data into fixed-size blocks. */
abstract class AbstractBigArray {
private final int pageShift;
private final int pageMask;
protected long size;
protected AbstractBigArray(int pageSize) {
Preconditions.checkArgument(pageSize >= 128, "pageSize must be >= 128");
Preconditions.checkArgument((pageSize & (pageSize - 1)) == 0, "pageSize must be a power of two");
this.pageShift = Integer.numberOfTrailingZeros(pageSize);
this.pageMask = pageSize - 1;
size = 0;
}
final int numPages(long capacity) {
final long numPages = (capacity + pageMask) >>> pageShift;
Preconditions.checkArgument(numPages <= Integer.MAX_VALUE, "pageSize=" + (pageMask + 1) + " is too small for such as capacity: " + capacity);
return (int) numPages;
}
final int pageSize() {
return pageMask + 1;
}
final int pageIndex(long index) {
return (int) (index >>> pageShift);
}
final int indexInPage(long index) {
return (int) (index & pageMask);
}
public final long size() {
return size;
}
protected abstract int numBytesPerElement();
public final long sizeInBytes() {
// rough approximate, we only take into account the size of the values, not the overhead of the array objects
return ((long) pageIndex(size - 1) + 1) * pageSize() * numBytesPerElement();
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.util;
import org.apache.lucene.util.ArrayUtil;
import org.elasticsearch.common.RamUsage;
import java.util.Arrays;
/** Float array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of
* configurable length. */
public final class BigDoubleArrayList extends AbstractBigArray {
/** Default page size, 16KB of memory per page. */
private static final int DEFAULT_PAGE_SIZE = 1 << 11;
private double[][] pages;
public BigDoubleArrayList(int pageSize, long initialCapacity) {
super(pageSize);
pages = new double[numPages(initialCapacity)][];
}
public BigDoubleArrayList(long initialCapacity) {
this(DEFAULT_PAGE_SIZE, initialCapacity);
}
public BigDoubleArrayList() {
this(1024);
}
public double get(long index) {
assert index >= 0 && index < size;
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
return pages[pageIndex][indexInPage];
}
public void add(double d) {
final int pageIndex = pageIndex(size);
if (pageIndex >= pages.length) {
final int newLength = ArrayUtil.oversize(pageIndex + 1, numBytesPerElement());
pages = Arrays.copyOf(pages, newLength);
}
if (pages[pageIndex] == null) {
pages[pageIndex] = new double[pageSize()];
}
final int indexInPage = indexInPage(size);
pages[pageIndex][indexInPage] = d;
++size;
}
@Override
protected int numBytesPerElement() {
return RamUsage.NUM_BYTES_DOUBLE;
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.util;
import org.apache.lucene.util.ArrayUtil;
import org.elasticsearch.common.RamUsage;
/** Float array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of
* configurable length. */
public final class BigFloatArrayList extends AbstractBigArray {
/** Default page size, 16KB of memory per page. */
private static final int DEFAULT_PAGE_SIZE = 1 << 12;
private float[][] pages;
public BigFloatArrayList(int pageSize, long initialCapacity) {
super(pageSize);
pages = new float[numPages(initialCapacity)][];
}
public BigFloatArrayList(long initialCapacity) {
this(DEFAULT_PAGE_SIZE, initialCapacity);
}
public BigFloatArrayList() {
this(1024);
}
public float get(long index) {
assert index >= 0 && index < size;
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
return pages[pageIndex][indexInPage];
}
public void add(float f) {
final int pageIndex = pageIndex(size);
pages = ArrayUtil.grow(pages, pageIndex + 1);
if (pages[pageIndex] == null) {
pages[pageIndex] = new float[pageSize()];
}
final int indexInPage = indexInPage(size);
pages[pageIndex][indexInPage] = f;
++size;
}
@Override
protected int numBytesPerElement() {
return RamUsage.NUM_BYTES_FLOAT;
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.util;
import org.elasticsearch.common.RamUsage;
/** Int array abstraction able to support more than 2B values. This implementation slices data into fixed-sized blocks of
* configurable length. */
public final class BigIntArray extends AbstractBigArray implements IntArray {
/** Default page size, 16KB of memory per page. */
public static final int DEFAULT_PAGE_SIZE = 1 << 12;
private int[][] pages;
public BigIntArray(int pageSize, long size) {
super(pageSize);
this.size = size;
pages = new int[numPages(size)][];
for (int i = 0; i < pages.length; ++i) {
pages[i] = new int[pageSize()];
}
}
public BigIntArray(long size) {
this(DEFAULT_PAGE_SIZE, size);
}
public int get(long index) {
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
return pages[pageIndex][indexInPage];
}
public void set(long index, int value) {
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
pages[pageIndex][indexInPage] = value;
}
public int increment(long index, int inc) {
final int pageIndex = pageIndex(index);
final int indexInPage = indexInPage(index);
return pages[pageIndex][indexInPage] += inc;
}
@Override
protected int numBytesPerElement() {
return RamUsage.NUM_BYTES_INT;
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.util;
/** Abstraction of an array of integer values. */
public interface IntArray {
/** Get an element given its index. */
public abstract int get(long index);
/** Set a value at the given index. */
public abstract void set(long index, int value);
/** Increment value at the given index by <code>inc</code> and return the value. */
public abstract int increment(long index, int inc);
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.util;
/** Utility methods to work with {@link IntArray}s. */
public class IntArrays {
private IntArrays() {}
/** Return a {@link IntArray} view over the provided array. */
public static IntArray wrap(final int[] array) {
return new IntArray() {
private void checkIndex(long index) {
if (index > Integer.MAX_VALUE) {
throw new IndexOutOfBoundsException(Long.toString(index));
}
}
@Override
public void set(long index, int value) {
checkIndex(index);
array[(int) index] = value;
}
@Override
public int increment(long index, int inc) {
checkIndex(index);
return array[(int) index] += inc;
}
@Override
public int get(long index) {
checkIndex(index);
return array[(int) index];
}
};
}
/** Return a newly allocated {@link IntArray} of the given length or more. */
public static IntArray allocate(long length) {
if (length <= BigIntArray.DEFAULT_PAGE_SIZE) {
return wrap(new int[(int) length]);
} else {
return new BigIntArray(length);
}
}
}

View File

@ -120,10 +120,10 @@ public abstract class BytesValues {
public static class Single implements Iter {
protected BytesRef value;
protected int ord;
protected long ord;
protected boolean done;
public Single reset(BytesRef value, int ord) {
public Single reset(BytesRef value, long ord) {
this.value = value;
this.ord = ord;
this.done = false;
@ -149,8 +149,8 @@ public abstract class BytesValues {
static class Multi implements Iter {
protected int innerOrd;
protected int ord;
protected long innerOrd;
protected long ord;
protected BytesValues.WithOrdinals withOrds;
protected Ordinals.Docs.Iter ordsIter;
protected final BytesRef scratch = new BytesRef();
@ -226,7 +226,7 @@ public abstract class BytesValues {
return ordinals;
}
public BytesRef getValueByOrd(int ord) {
public BytesRef getValueByOrd(long ord) {
return getValueScratchByOrd(ord, scratch);
}
@ -247,7 +247,7 @@ public abstract class BytesValues {
@Override
public BytesRef getValue(int docId) {
final int ord = ordinals.getOrd(docId);
final long ord = ordinals.getOrd(docId);
if (ord == 0) {
return null;
}
@ -268,7 +268,7 @@ public abstract class BytesValues {
* result which will also be returned. If there is no value for this docId, the length will be 0.
* Note, the bytes are not "safe".
*/
public abstract BytesRef getValueScratchByOrd(int ord, BytesRef ret);
public abstract BytesRef getValueScratchByOrd(long ord, BytesRef ret);
public static class Empty extends WithOrdinals {
@ -277,7 +277,7 @@ public abstract class BytesValues {
}
@Override
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
public BytesRef getValueScratchByOrd(long ord, BytesRef ret) {
ret.length = 0;
return ret;
}

View File

@ -120,7 +120,7 @@ public abstract class DoubleValues {
@Override
public final double getValueMissing(int docId, double missingValue) {
final int ord = ordinals.getOrd(docId);
final long ord = ordinals.getOrd(docId);
if (ord == 0) {
return missingValue;
} else {
@ -128,7 +128,7 @@ public abstract class DoubleValues {
}
}
public abstract double getValueByOrd(int ord);
public abstract double getValueByOrd(long ord);
@Override
public final Iter getIter(int docId) {
@ -184,8 +184,8 @@ public abstract class DoubleValues {
static class Multi implements Iter {
private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter;
private int ord;
private Ordinals.Docs.Iter ordsIter;
private long ord;
private WithOrdinals values;
public Multi(WithOrdinals values) {

View File

@ -118,7 +118,7 @@ public abstract class LongValues {
return getValueByOrd(ordinals.getOrd(docId));
}
public abstract long getValueByOrd(int ord);
public abstract long getValueByOrd(long ord);
@Override
public final Iter getIter(int docId) {
@ -127,7 +127,7 @@ public abstract class LongValues {
@Override
public final long getValueMissing(int docId, long missingValue) {
final int ord = ordinals.getOrd(docId);
final long ord = ordinals.getOrd(docId);
if (ord == 0) {
return missingValue;
} else {
@ -185,7 +185,7 @@ public abstract class LongValues {
static class Multi implements Iter {
private org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter ordsIter;
private int ord;
private long ord;
private WithOrdinals values;
public Multi(WithOrdinals values) {

View File

@ -45,7 +45,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
/* Ords for each slot.
@lucene.internal */
final int[] ords;
final long[] ords;
final SortMode sortMode;
@ -75,7 +75,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
/* Bottom ord (same as ords[bottomSlot] once bottomSlot
is set). Cached for faster compares.
@lucene.internal */
int bottomOrd;
long bottomOrd;
/* True if current bottom slot matches the current
reader.
@ -92,7 +92,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
public BytesRefOrdValComparator(IndexFieldData.WithOrdinals<?> indexFieldData, int numHits, SortMode sortMode) {
this.indexFieldData = indexFieldData;
this.sortMode = sortMode;
ords = new int[numHits];
ords = new long[numHits];
values = new BytesRef[numHits];
readerGen = new int[numHits];
}
@ -100,7 +100,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
@Override
public int compare(int slot1, int slot2) {
if (readerGen[slot1] == readerGen[slot2]) {
return ords[slot1] - ords[slot2];
return LongValuesComparator.compare(ords[slot1], ords[slot2]);
}
final BytesRef val1 = values[slot1];
@ -207,7 +207,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
final int docOrd = (readerOrds[doc] & 0xFF);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
return (int) bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
@ -253,7 +253,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
final int docOrd = (readerOrds[doc] & 0xFFFF);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
return (int) bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
@ -299,7 +299,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
final int docOrd = readerOrds[doc];
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
return (int) bottomOrd - docOrd;
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
@ -345,10 +345,10 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
@Override
public int compareBottom(int doc) {
assert bottomSlot != -1;
final int docOrd = readerOrds.getOrd(doc);
final long docOrd = readerOrds.getOrd(doc);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
return LongValuesComparator.compare(bottomOrd, docOrd);
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
@ -361,7 +361,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
@Override
public void copy(int slot, int doc) {
final int ord = readerOrds.getOrd(doc);
final long ord = readerOrds.getOrd(doc);
ords[slot] = ord;
if (ord == 0) {
values[slot] = null;
@ -428,7 +428,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
bottomSameReader = true;
readerGen[bottomSlot] = currentReaderGen;
} else {
final int index = binarySearch(termsIndex, bottomValue);
final long index = binarySearch(termsIndex, bottomValue);
if (index < 0) {
bottomOrd = -index - 2;
bottomSameReader = false;
@ -448,15 +448,15 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
return values[slot];
}
final protected static int binarySearch(BytesValues.WithOrdinals a, BytesRef key) {
final protected static long binarySearch(BytesValues.WithOrdinals a, BytesRef key) {
return binarySearch(a, key, 1, a.ordinals().getNumOrds());
}
final protected static int binarySearch(BytesValues.WithOrdinals a, BytesRef key, int low, int high) {
final protected static long binarySearch(BytesValues.WithOrdinals a, BytesRef key, long low, long high) {
assert a.getValueByOrd(high) == null | a.getValueByOrd(high) != null; // make sure we actually can get these values
assert a.getValueByOrd(low) == null | a.getValueByOrd(low) != null;
while (low <= high) {
int mid = (low + high) >>> 1;
long mid = (low + high) >>> 1;
BytesRef midVal = a.getValueByOrd(mid);
int cmp;
if (midVal != null) {
@ -488,10 +488,10 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
@Override
public int compareBottom(int doc) throws IOException {
final int docOrd = getRelevantOrd(readerOrds, doc, sortMode);
final long docOrd = getRelevantOrd(readerOrds, doc, sortMode);
if (bottomSameReader) {
// ord is precisely comparable, even in the equal case
return bottomOrd - docOrd;
return LongValuesComparator.compare(bottomOrd, docOrd);
} else if (bottomOrd >= docOrd) {
// the equals case always means bottom is > doc
// (because we set bottomOrd to the lower bound in
@ -504,7 +504,7 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
@Override
public void copy(int slot, int doc) throws IOException {
final int ord = getRelevantOrd(readerOrds, doc, sortMode);
final long ord = getRelevantOrd(readerOrds, doc, sortMode);
ords[slot] = ord;
if (ord == 0) {
values[slot] = null;
@ -561,14 +561,14 @@ public final class BytesRefOrdValComparator extends FieldComparator<BytesRef> {
return relevantVal;
}
static int getRelevantOrd(Ordinals.Docs readerOrds, int docId, SortMode sortMode) {
static long getRelevantOrd(Ordinals.Docs readerOrds, int docId, SortMode sortMode) {
Ordinals.Docs.Iter iter = readerOrds.getIter(docId);
int currentVal = iter.next();
long currentVal = iter.next();
if (currentVal == 0) {
return 0;
}
int relevantVal = currentVal;
long relevantVal = currentVal;
while (true) {
if (sortMode == SortMode.MAX) {
if (currentVal > relevantVal) {

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LongsRef;
import org.elasticsearch.common.RamUsage;
/**
@ -64,13 +64,13 @@ public class DocIdOrdinals implements Ordinals {
}
@Override
public int getNumOrds() {
public long getNumOrds() {
return numDocs;
}
@Override
public int getMaxOrd() {
return numDocs + 1;
public long getMaxOrd() {
return 1L + numDocs;
}
@Override
@ -81,7 +81,7 @@ public class DocIdOrdinals implements Ordinals {
public static class Docs implements Ordinals.Docs {
private final DocIdOrdinals parent;
private final IntsRef intsScratch = new IntsRef(new int[1], 0, 1);
private final LongsRef longsScratch = new LongsRef(new long[1], 0, 1);
private final SingleValueIter iter = new SingleValueIter();
public Docs(DocIdOrdinals parent) {
@ -99,12 +99,12 @@ public class DocIdOrdinals implements Ordinals {
}
@Override
public int getNumOrds() {
public long getNumOrds() {
return parent.getNumOrds();
}
@Override
public int getMaxOrd() {
public long getMaxOrd() {
return parent.getMaxOrd();
}
@ -114,14 +114,14 @@ public class DocIdOrdinals implements Ordinals {
}
@Override
public int getOrd(int docId) {
public long getOrd(int docId) {
return docId + 1;
}
@Override
public IntsRef getOrds(int docId) {
intsScratch.ints[0] = docId + 1;
return intsScratch;
public LongsRef getOrds(int docId) {
longsScratch.longs[0] = docId + 1;
return longsScratch;
}
@Override

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LongsRef;
/**
*/
@ -57,12 +57,12 @@ public class EmptyOrdinals implements Ordinals {
}
@Override
public int getNumOrds() {
public long getNumOrds() {
return 0;
}
@Override
public int getMaxOrd() {
public long getMaxOrd() {
return 1;
}
@ -74,7 +74,7 @@ public class EmptyOrdinals implements Ordinals {
public static class Docs implements Ordinals.Docs {
private final EmptyOrdinals parent;
public static final IntsRef EMPTY_INTS_REF = new IntsRef();
public static final LongsRef EMPTY_LONGS_REF = new LongsRef();
public Docs(EmptyOrdinals parent) {
this.parent = parent;
@ -91,12 +91,12 @@ public class EmptyOrdinals implements Ordinals {
}
@Override
public int getNumOrds() {
public long getNumOrds() {
return 0;
}
@Override
public int getMaxOrd() {
public long getMaxOrd() {
return 1;
}
@ -106,13 +106,13 @@ public class EmptyOrdinals implements Ordinals {
}
@Override
public int getOrd(int docId) {
public long getOrd(int docId) {
return 0;
}
@Override
public IntsRef getOrds(int docId) {
return EMPTY_INTS_REF;
public LongsRef getOrds(int docId) {
return EMPTY_LONGS_REF;
}
@Override

View File

@ -1,189 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.IntsRef;
import org.elasticsearch.common.RamUsage;
/**
* "Flat" multi valued ordinals, the first level array size is as the maximum
* values a docId has. Ordinals are populated in order from the first flat array
* value to the next.
*/
public final class MultiFlatArrayOrdinals implements Ordinals {
// ordinals with value 0 indicates no value
private final int[][] ordinals;
private final int numDocs;
private final int numOrds;
private final int maxOrd;
private long size = -1;
public MultiFlatArrayOrdinals(int[][] ordinals, int numOrds) {
assert ordinals.length > 0;
this.ordinals = ordinals;
this.numDocs = ordinals[0].length;
this.numOrds = numOrds;
this.maxOrd = numOrds + 1;
}
@Override
public boolean hasSingleArrayBackingStorage() {
return false;
}
@Override
public Object getBackingStorage() {
return ordinals;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
long size = 0;
size += RamUsage.NUM_BYTES_ARRAY_HEADER; // for the top level array
for (int[] ordinal : ordinals) {
size += RamUsage.NUM_BYTES_INT * ordinal.length + RamUsage.NUM_BYTES_ARRAY_HEADER;
}
this.size = size;
}
return size;
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public int getNumDocs() {
return numDocs;
}
@Override
public int getNumOrds() {
return numOrds;
}
@Override
public int getMaxOrd() {
return this.maxOrd;
}
@Override
public Docs ordinals() {
return new Docs(this, ordinals);
}
public static class Docs implements Ordinals.Docs {
private final MultiFlatArrayOrdinals parent;
private final int[][] ordinals;
private final IterImpl iter;
private final IntsRef intsScratch;
public Docs(MultiFlatArrayOrdinals parent, int[][] ordinals) {
this.parent = parent;
this.ordinals = ordinals;
this.iter = new IterImpl(ordinals);
this.intsScratch = new IntsRef(new int[16], 0 , 16);
}
@Override
public Ordinals ordinals() {
return this.parent;
}
@Override
public int getNumDocs() {
return parent.getNumDocs();
}
@Override
public int getNumOrds() {
return parent.getNumOrds();
}
@Override
public int getMaxOrd() {
return parent.getMaxOrd();
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public int getOrd(int docId) {
return ordinals[0][docId];
}
@Override
public IntsRef getOrds(int docId) {
intsScratch.offset = 0;
int i;
for (i = 0; i < ordinals.length; i++) {
int ordinal = ordinals[i][docId];
if (ordinal == 0) {
if (i == 0) {
intsScratch.length = 0;
return intsScratch;
}
break;
}
intsScratch.grow(i+1);
intsScratch.ints[i] = ordinal;
}
intsScratch.length = i;
return intsScratch;
}
@Override
public Iter getIter(int docId) {
return iter.reset(docId);
}
public static class IterImpl implements Docs.Iter {
private final int[][] ordinals;
private int docId;
private int i;
public IterImpl(int[][] ordinals) {
this.ordinals = ordinals;
}
public IterImpl reset(int docId) {
this.docId = docId;
this.i = 0;
return this;
}
@Override
public int next() {
if (i >= ordinals.length) return 0;
return ordinals[i++][docId];
}
}
}
}

View File

@ -0,0 +1,219 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.AppendingLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter;
/** {@link Ordinals} implementation which is efficient at storing field data ordinals for multi-valued or sparse fields. */
public class MultiOrdinals implements Ordinals {
// hard-coded in Lucene 4.3 but will be exposed in Lucene 4.4
static {
assert Lucene.VERSION == Version.LUCENE_43;
}
private static final int OFFSETS_PAGE_SIZE = 1024;
/** Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%. */
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) {
final int bitsPerOrd = PackedInts.bitsRequired(numOrds);
// Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
// beginning of the block and all docs have one at the end of the block
final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
final int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
}
private final boolean multiValued;
private final long numOrds;
private final MonotonicAppendingLongBuffer endOffsets;
private final AppendingLongBuffer ords;
public MultiOrdinals(OrdinalsBuilder builder) {
multiValued = builder.getNumMultiValuesDocs() > 0;
numOrds = builder.getNumOrds();
endOffsets = new MonotonicAppendingLongBuffer();
ords = new AppendingLongBuffer();
long lastEndOffset = 0;
for (int i = 0; i < builder.maxDoc(); ++i) {
final LongsRef docOrds = builder.docOrds(i);
final long endOffset = lastEndOffset + docOrds.length;
endOffsets.add(endOffset);
for (int j = 0; j < docOrds.length; ++j) {
ords.add(docOrds.longs[docOrds.offset + j] - 1);
}
lastEndOffset = endOffset;
}
assert endOffsets.size() == builder.maxDoc();
assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds();
}
@Override
public boolean hasSingleArrayBackingStorage() {
return false;
}
@Override
public Object getBackingStorage() {
return null;
}
@Override
public long getMemorySizeInBytes() {
return endOffsets.ramBytesUsed() + ords.ramBytesUsed();
}
@Override
public boolean isMultiValued() {
return multiValued;
}
@Override
public int getNumDocs() {
return (int) endOffsets.size();
}
@Override
public long getNumOrds() {
return numOrds;
}
@Override
public long getMaxOrd() {
return numOrds + 1;
}
@Override
public Ordinals.Docs ordinals() {
return new MultiDocs(this);
}
static class MultiDocs implements Ordinals.Docs {
private final MultiOrdinals ordinals;
private final MonotonicAppendingLongBuffer endOffsets;
private final AppendingLongBuffer ords;
private final LongsRef longsScratch;
private final MultiIter iter;
MultiDocs(MultiOrdinals ordinals) {
this.ordinals = ordinals;
this.endOffsets = ordinals.endOffsets;
this.ords = ordinals.ords;
this.longsScratch = new LongsRef(16);
this.iter = new MultiIter(ords);
}
@Override
public Ordinals ordinals() {
return null;
}
@Override
public int getNumDocs() {
return ordinals.getNumDocs();
}
@Override
public long getNumOrds() {
return ordinals.getNumOrds();
}
@Override
public long getMaxOrd() {
return ordinals.getMaxOrd();
}
@Override
public boolean isMultiValued() {
return ordinals.isMultiValued();
}
@Override
public long getOrd(int docId) {
final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0;
final long endOffset = endOffsets.get(docId);
if (startOffset == endOffset) {
return 0L; // ord for missing values
} else {
return 1L + ords.get(startOffset);
}
}
@Override
public LongsRef getOrds(int docId) {
final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0;
final long endOffset = endOffsets.get(docId);
final int numValues = (int) (endOffset - startOffset);
if (longsScratch.length < numValues) {
longsScratch.longs = new long[ArrayUtil.oversize(numValues, RamUsage.NUM_BYTES_LONG)];
}
for (int i = 0; i < numValues; ++i) {
longsScratch.longs[i] = 1L + ords.get(startOffset + i);
}
longsScratch.offset = 0;
longsScratch.length = numValues;
return longsScratch;
}
@Override
public Iter getIter(int docId) {
final long startOffset = docId > 0 ? endOffsets.get(docId - 1) : 0;
final long endOffset = endOffsets.get(docId);
iter.offset = startOffset;
iter.endOffset = endOffset;
return iter;
}
}
static class MultiIter implements Iter {
final AppendingLongBuffer ordinals;
long offset, endOffset;
MultiIter(AppendingLongBuffer ordinals) {
this.ordinals = ordinals;
}
@Override
public long next() {
if (offset >= endOffset) {
return 0L;
} else {
return 1L + ordinals.get(offset++);
}
}
}
}

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LongsRef;
/**
* A thread safe ordinals abstraction. Ordinals can only be positive integers.
@ -54,13 +54,13 @@ public interface Ordinals {
/**
* The number of ordinals, excluding the "0" ordinal indicating a missing value.
*/
int getNumOrds();
long getNumOrds();
/**
* Returns total unique ord count; this includes +1 for
* the null ord (always 0).
*/
int getMaxOrd();
long getMaxOrd();
/**
* Returns a lightweight (non thread safe) view iterator of the ordinals.
@ -88,13 +88,13 @@ public interface Ordinals {
/**
* The number of ordinals, excluding the "0" ordinal (indicating a missing value).
*/
int getNumOrds();
long getNumOrds();
/**
* Returns total unique ord count; this includes +1 for
* the null ord (always 0).
*/
int getMaxOrd();
long getMaxOrd();
/**
* Is one of the docs maps to more than one ordinal?
@ -105,13 +105,13 @@ public interface Ordinals {
* The ordinal that maps to the relevant docId. If it has no value, returns
* <tt>0</tt>.
*/
int getOrd(int docId);
long getOrd(int docId);
/**
* Returns an array of ordinals matching the docIds, with 0 length one
* for a doc with no ordinals.
*/
IntsRef getOrds(int docId);
LongsRef getOrds(int docId);
/**
* Returns an iterator of the ordinals that match the docId, with an
@ -128,7 +128,7 @@ public interface Ordinals {
/**
* Gets the next ordinal. Returning 0 if the iteration is exhausted.
*/
int next();
long next();
}
static class EmptyIter implements Iter {
@ -136,23 +136,23 @@ public interface Ordinals {
public static EmptyIter INSTANCE = new EmptyIter();
@Override
public int next() {
public long next() {
return 0;
}
}
static class SingleValueIter implements Iter {
private int value;
private long value;
public SingleValueIter reset(int value) {
public SingleValueIter reset(long value) {
this.value = value;
return this;
}
@Override
public int next() {
int actual = value;
public long next() {
long actual = value;
value = 0;
return actual;
}

View File

@ -1,4 +1,3 @@
package org.elasticsearch.index.fielddata.ordinals;
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
@ -17,21 +16,21 @@ package org.elasticsearch.index.fielddata.ordinals;
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.*;
import org.apache.lucene.util.IntBlockPool.Allocator;
import org.apache.lucene.util.IntBlockPool.DirectAllocator;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.apache.lucene.util.packed.XPagedGrowableWriter;
import org.elasticsearch.common.settings.Settings;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
/**
@ -41,54 +40,251 @@ import java.util.Comparator;
*/
public final class OrdinalsBuilder implements Closeable {
private final int maxDoc;
private int[] mvOrds;
private GrowableWriter svOrds;
/** Default acceptable overhead ratio. {@link OrdinalsBuilder} memory usage is mostly transient so it is likely a better trade-off to
* trade memory for speed in order to resize less often. */
public static final float DEFAULT_ACCEPTABLE_OVERHEAD_RATIO = PackedInts.FAST;
private int[] offsets;
private final IntBlockPool pool;
private final IntBlockPool.SliceWriter writer;
private final IntsRef intsRef = new IntsRef(1);
private final IntBlockPool.SliceReader reader;
private int currentOrd = 0;
/** The following structure is used to store ordinals. The idea is to store ords on levels of increasing sizes. Level 0 stores
* 1 value and 1 pointer to level 1. Level 1 stores 2 values and 1 pointer to level 2, ..., Level n stores 2**n values and
* 1 pointer to level n+1. If at some point an ordinal or a pointer has 0 as a value, this means that there are no remaining
* values. On the first level, ordinals.get(docId) is the first ordinal for docId or 0 if the document has no ordinals. On
* subsequent levels, the first 2^level slots are reserved and all have 0 as a value.
* <pre>
* Example for an index of 3 docs (O=ordinal, P = pointer)
* Level 0:
* ordinals [1] [4] [2]
* nextLevelSlices 2 0 1
* Level 1:
* ordinals [0 0] [2 0] [3 4]
* nextLevelSlices 0 0 1
* Level 2:
* ordinals [0 0 0 0] [5 0 0 0]
* nextLevelSlices 0 0
* </pre>
* On level 0, all documents have an ordinal: 0 has 1, 1 has 4 and 2 has 2 as a first ordinal, this means that we need to read
* nextLevelEntries to get the index of their ordinals on the next level. The entry for document 1 is 0, meaning that we have
* already read all its ordinals. On the contrary 0 and 2 have more ordinals which are stored at indices 2 and 1. Let's continue
* with document 2: it has 2 more ordinals on level 1: 3 and 4 and its next level index is 1 meaning that there are remaining
* ordinals on the next level. On level 2 at index 1, we can read [5 0 0 0] meaning that 5 is an ordinal as well, but the
* fact that it is followed by zeros means that there are no more ordinals. In the end, document 2 has 2, 3, 4 and 5 as ordinals.
*
* In addition to these structures, there is another array which stores the current position (level + slice + offset in the slice)
* in order to be able to append data in constant time.
*/
private static class OrdinalsStore {
private static final int PAGE_SIZE = 1 << 12;
/** Number of slots at <code>level</code> */
private static int numSlots(int level) {
return 1 << level;
}
private static int slotsMask(int level) {
return numSlots(level) - 1;
}
/** Encode the position for the given level and offset. The idea is to encode the level using unary coding in the lower bits and
* then the offset in the higher bits. */
private static long position(int level, long offset) {
assert level >= 1;
return (1 << (level - 1)) | (offset << level);
}
/** Decode the level from an encoded position. */
private static int level(long position) {
return 1 + Long.numberOfTrailingZeros(position);
}
/** Decode the offset from the position. */
private static long offset(long position, int level) {
return position >>> level;
}
/** Get the ID of the slice given an offset. */
private static long sliceID(int level, long offset) {
return offset >>> level;
}
/** Compute the first offset of the given slice. */
private static long startOffset(int level, long slice) {
return slice << level;
}
/** Compute the number of ordinals stored for a value given its current position. */
private static int numOrdinals(int level, long offset) {
return (1 << level) + (int) (offset & slotsMask(level));
}
// Current position
private XPagedGrowableWriter positions;
// First level (0) of ordinals and pointers to the next level
private final GrowableWriter firstOrdinals;
private XPagedGrowableWriter firstNextLevelSlices;
// Ordinals and pointers for other levels, starting at 1
private final XPagedGrowableWriter[] ordinals;
private final XPagedGrowableWriter[] nextLevelSlices;
private final int[] sizes;
private final int startBitsPerValue;
private final float acceptableOverheadRatio;
OrdinalsStore(int maxDoc, int startBitsPerValue, float acceptableOverheadRatio) {
this.startBitsPerValue = startBitsPerValue;
this.acceptableOverheadRatio = acceptableOverheadRatio;
positions = new XPagedGrowableWriter(maxDoc, PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
firstOrdinals = new GrowableWriter(startBitsPerValue, maxDoc, acceptableOverheadRatio);
// over allocate in order to never worry about the array sizes, 24 entries would allow to store several millions of ordinals per doc...
ordinals = new XPagedGrowableWriter[24];
nextLevelSlices = new XPagedGrowableWriter[24];
sizes = new int[24];
Arrays.fill(sizes, 1); // reserve the 1st slice on every level
}
/** Allocate a new slice and return its ID. */
private long newSlice(int level) {
final long newSlice = sizes[level]++;
// Lazily allocate ordinals
if (ordinals[level] == null) {
ordinals[level] = new XPagedGrowableWriter(8L * numSlots(level), PAGE_SIZE, startBitsPerValue, acceptableOverheadRatio);
} else {
ordinals[level] = ordinals[level].grow(sizes[level] * numSlots(level));
if (nextLevelSlices[level] != null) {
nextLevelSlices[level] = nextLevelSlices[level].grow(sizes[level]);
}
}
return newSlice;
}
public int addOrdinal(int docID, long ordinal) {
final long position = positions.get(docID);
if (position == 0L) { // on the first level
// 0 or 1 ordinal
if (firstOrdinals.get(docID) == 0L) {
firstOrdinals.set(docID, ordinal);
return 1;
} else {
final long newSlice = newSlice(1);
if (firstNextLevelSlices == null) {
firstNextLevelSlices = new XPagedGrowableWriter(firstOrdinals.size(), PAGE_SIZE, 3, acceptableOverheadRatio);
}
firstNextLevelSlices.set(docID, newSlice);
final long offset = startOffset(1, newSlice);
ordinals[1].set(offset, ordinal);
positions.set(docID, position(1, offset)); // current position is on the 1st level and not allocated yet
return 2;
}
} else {
int level = level(position);
long offset = offset(position, level);
assert offset != 0L;
if (((offset + 1) & slotsMask(level)) == 0L) {
// reached the end of the slice, allocate a new one on the next level
final long newSlice = newSlice(level + 1);
if (nextLevelSlices[level] == null) {
nextLevelSlices[level] = new XPagedGrowableWriter(sizes[level], PAGE_SIZE, 1, acceptableOverheadRatio);
}
nextLevelSlices[level].set(sliceID(level, offset), newSlice);
++level;
offset = startOffset(level, newSlice);
assert (offset & slotsMask(level)) == 0L;
} else {
// just go to the next slot
++offset;
}
ordinals[level].set(offset, ordinal);
final long newPosition = position(level, offset);
positions.set(docID, newPosition);
return numOrdinals(level, offset);
}
}
public void appendOrdinals(int docID, LongsRef ords) {
// First level
final long firstOrd = firstOrdinals.get(docID);
if (firstOrd == 0L) {
return;
}
ords.longs = ArrayUtil.grow(ords.longs, ords.offset + ords.length + 1);
ords.longs[ords.offset + ords.length++] = firstOrd;
if (firstNextLevelSlices == null) {
return;
}
long sliceID = firstNextLevelSlices.get(docID);
if (sliceID == 0L) {
return;
}
// Other levels
for (int level = 1; ; ++level) {
final int numSlots = numSlots(level);
ords.longs = ArrayUtil.grow(ords.longs, ords.offset + ords.length + numSlots);
final long offset = startOffset(level, sliceID);
for (int j = 0; j < numSlots; ++j) {
final long ord = ordinals[level].get(offset + j);
if (ord == 0L) {
return;
}
ords.longs[ords.offset + ords.length++] = ord;
}
if (nextLevelSlices[level] == null) {
return;
}
sliceID = nextLevelSlices[level].get(sliceID);
if (sliceID == 0L) {
return;
}
}
}
}
private final int maxDoc;
private long currentOrd = 0;
private int numDocsWithValue = 0;
private int numMultiValuedDocs = 0;
private int totalNumOrds = 0;
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, Allocator allocator, float acceptableOverheadRatio) throws IOException {
private OrdinalsStore ordinals;
private final LongsRef spare;
public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException {
this.maxDoc = maxDoc;
if (preDefineBitsRequired) {
int numTerms = (int) terms.size();
if (numTerms == -1) {
svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio);
} else {
svOrds = new GrowableWriter(PackedInts.bitsRequired(numTerms), maxDoc, acceptableOverheadRatio);
int startBitsPerValue = 8;
if (numTerms >= 0) {
startBitsPerValue = PackedInts.bitsRequired(numTerms);
}
} else {
svOrds = new GrowableWriter(1, maxDoc, acceptableOverheadRatio);
ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
spare = new LongsRef();
}
pool = new IntBlockPool(allocator);
reader = new IntBlockPool.SliceReader(pool);
writer = new IntBlockPool.SliceWriter(pool);
public OrdinalsBuilder(int maxDoc, float acceptableOverheadRatio) throws IOException {
this(-1, maxDoc, acceptableOverheadRatio);
}
public OrdinalsBuilder(int maxDoc) throws IOException {
this(null, false, maxDoc, PackedInts.DEFAULT);
this(maxDoc, DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
}
public OrdinalsBuilder(Terms terms, boolean preDefineBitsRequired, int maxDoc, float acceptableOverheadRatio) throws IOException {
this(terms, preDefineBitsRequired, maxDoc, new DirectAllocator(), acceptableOverheadRatio);
/**
* Returns a shared {@link LongsRef} instance for the given doc ID holding all ordinals associated with it.
*/
public LongsRef docOrds(int docID) {
spare.offset = spare.length = 0;
ordinals.appendOrdinals(docID, spare);
return spare;
}
public OrdinalsBuilder(Terms terms, int maxDoc, float acceptableOverheadRatio) throws IOException {
this(terms, true, maxDoc, new DirectAllocator(), acceptableOverheadRatio);
/** Return a {@link PackedInts.Reader} instance mapping every doc ID to its first ordinal if it exists and 0 otherwise. */
public PackedInts.Reader getFirstOrdinals() {
return ordinals.firstOrdinals;
}
/**
* Advances the {@link OrdinalsBuilder} to the next ordinal and
* return the current ordinal.
*/
public int nextOrdinal() {
public long nextOrdinal() {
return ++currentOrd;
}
@ -96,7 +292,7 @@ public final class OrdinalsBuilder implements Closeable {
* Retruns the current ordinal or <tt>0</tt> if this build has not been advanced via
* {@link #nextOrdinal()}.
*/
public int currentOrdinal() {
public long currentOrdinal() {
return currentOrd;
}
@ -105,42 +301,11 @@ public final class OrdinalsBuilder implements Closeable {
*/
public OrdinalsBuilder addDoc(int doc) {
totalNumOrds++;
if (svOrds != null) {
int docsOrd = (int) svOrds.get(doc);
if (docsOrd == 0) {
svOrds.set(doc, currentOrd);
numDocsWithValue++;
} else {
// Rebuilding ords that supports mv based on sv ords.
mvOrds = new int[maxDoc];
for (int docId = 0; docId < maxDoc; docId++) {
mvOrds[docId] = (int) svOrds.get(docId);
}
svOrds = null;
}
}
if (mvOrds != null) {
int docsOrd = mvOrds[doc];
if (docsOrd == 0) {
mvOrds[doc] = currentOrd;
numDocsWithValue++;
} else if (docsOrd > 0) {
numMultiValuedDocs++;
int offset = writer.startNewSlice();
writer.writeInt(docsOrd);
writer.writeInt(currentOrd);
if (offsets == null) {
offsets = new int[mvOrds.length];
}
offsets[doc] = writer.getCurrentOffset();
mvOrds[doc] = (-1 * offset) - 1;
} else {
assert offsets != null;
writer.reset(offsets[doc]);
writer.writeInt(currentOrd);
offsets[doc] = writer.getCurrentOffset();
}
final int numValues = ordinals.addOrdinal(doc, currentOrd);
if (numValues == 1) {
++numDocsWithValue;
} else if (numValues == 2) {
++numMultiValuedDocs;
}
return this;
}
@ -149,7 +314,7 @@ public final class OrdinalsBuilder implements Closeable {
* Returns <code>true</code> iff this builder contains a document ID that is associated with more than one ordinal. Otherwise <code>false</code>;
*/
public boolean isMultiValued() {
return offsets != null;
return numMultiValuedDocs > 0;
}
/**
@ -183,7 +348,7 @@ public final class OrdinalsBuilder implements Closeable {
/**
* Returns the number of distinct ordinals in this builder.
*/
public int getNumOrds() {
public long getNumOrds() {
return currentOrd;
}
@ -196,18 +361,9 @@ public final class OrdinalsBuilder implements Closeable {
return null;
}
final FixedBitSet bitSet = new FixedBitSet(maxDoc);
if (svOrds != null) {
for (int docId = 0; docId < maxDoc; docId++) {
int ord = (int) svOrds.get(docId);
if (ord != 0) {
bitSet.set(docId);
}
}
} else {
for (int docId = 0; docId < maxDoc; docId++) {
if (mvOrds[docId] != 0) {
bitSet.set(docId);
}
for (int docID = 0; docID < maxDoc; ++docID) {
if (ordinals.firstOrdinals.get(docID) != 0) {
bitSet.set(docID);
}
}
return bitSet;
@ -217,72 +373,15 @@ public final class OrdinalsBuilder implements Closeable {
* Builds an {@link Ordinals} instance from the builders current state.
*/
public Ordinals build(Settings settings) {
if (numMultiValuedDocs == 0) {
return new SinglePackedOrdinals(svOrds.getMutable(), getNumOrds());
}
final String multiOrdinals = settings.get("multi_ordinals", "sparse");
if ("flat".equals(multiOrdinals)) {
final ArrayList<int[]> ordinalBuffer = new ArrayList<int[]>();
for (int i = 0; i < mvOrds.length; i++) {
final IntsRef docOrds = docOrds(i);
while (ordinalBuffer.size() < docOrds.length) {
ordinalBuffer.add(new int[mvOrds.length]);
}
for (int j = docOrds.offset; j < docOrds.offset+docOrds.length; j++) {
ordinalBuffer.get(j)[i] = docOrds.ints[j];
}
}
int[][] nativeOrdinals = new int[ordinalBuffer.size()][];
for (int i = 0; i < nativeOrdinals.length; i++) {
nativeOrdinals[i] = ordinalBuffer.get(i);
}
return new MultiFlatArrayOrdinals(nativeOrdinals, getNumOrds());
} else if ("sparse".equals(multiOrdinals)) {
int multiOrdinalsMaxDocs = settings.getAsInt("multi_ordinals_max_docs", 16777216 /* Equal to 64MB per storeage array */);
return new SparseMultiArrayOrdinals(this, multiOrdinalsMaxDocs);
final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.COMPACT);
if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getNumOrds())) {
// MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields
return new MultiOrdinals(this);
} else {
throw new ElasticSearchIllegalArgumentException("no applicable fielddata multi_ordinals value, got [" + multiOrdinals + "]");
return new SinglePackedOrdinals(this, acceptableOverheadRatio);
}
}
/**
* Returns a shared {@link IntsRef} instance for the given doc ID holding all ordinals associated with it.
*/
public IntsRef docOrds(int doc) {
if (svOrds != null) {
int docsOrd = (int) svOrds.get(doc);
intsRef.offset = 0;
if (docsOrd == 0) {
intsRef.length = 0;
} else if (docsOrd > 0) {
intsRef.ints[0] = docsOrd;
intsRef.length = 1;
}
} else {
int docsOrd = mvOrds[doc];
intsRef.offset = 0;
if (docsOrd == 0) {
intsRef.length = 0;
} else if (docsOrd > 0) {
intsRef.ints[0] = mvOrds[doc];
intsRef.length = 1;
} else {
assert offsets != null;
reader.reset(-1 * (mvOrds[doc] + 1), offsets[doc]);
int pos = 0;
while (!reader.endOfSlice()) {
if (intsRef.ints.length <= pos) {
intsRef.ints = ArrayUtil.grow(intsRef.ints, pos + 1);
}
intsRef.ints[pos++] = reader.readInt();
}
intsRef.length = pos;
}
}
return intsRef;
}
/**
* Returns the maximum document ID this builder can associate with an ordinal
*/
@ -364,7 +463,6 @@ public final class OrdinalsBuilder implements Closeable {
*/
@Override
public void close() throws IOException {
pool.reset(true, false);
offsets = null;
ordinals = null;
}
}

View File

@ -1,146 +0,0 @@
package org.elasticsearch.index.fielddata.ordinals;
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IntsRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.RamUsage;
/**
* An efficient store for positive integer slices. This pool uses multiple
* sliced arrays to hold integers in int array pages rather than an object based
* datastructures.
*/
final class PositiveIntPool {
// TODO it might be useful to store the size of the slices in a sep
// datastructure rather than useing a negative value to donate this.
private final int blockShift;
private final int blockMask;
private final int blockSize;
/**
* array of buffers currently used in the pool. Buffers are allocated if
* needed don't modify this outside of this class
*/
private int[][] buffers = new int[10][];
/**
* index into the buffers array pointing to the current buffer used as the
* head
*/
private int bufferUpto = -1;
/** Pointer to the current position in head buffer */
private int intUpto;
/** Current head buffer */
private int[] buffer;
/** Current head offset */
private int intOffset;
/**
* Creates a new {@link PositiveIntPool} with the given blockShift.
*
* @param blockShift
* the n-the power of two indicating the size of each block in
* the paged datastructure. BlockSize = 1 << blockShift
*/
public PositiveIntPool(int blockShift) {
this.blockShift = blockShift;
this.blockSize = 1 << blockShift;
this.blockMask = blockSize - 1;
this.intUpto = blockSize;
this.intOffset = -blockSize;
}
/**
* Adds all integers in the given slices and returns the positive offset
* into the datastructure to retrive this slice.
*/
public int put(IntsRef slice) {
if ( slice.length > blockSize) {
throw new ElasticSearchIllegalArgumentException("Can not store slices greater or equal to: " + blockSize);
}
if ((intUpto + slice.length) > blockSize) {
nextBuffer();
}
final int relativeOffset = intUpto;
System.arraycopy(slice.ints, slice.offset, buffer, relativeOffset, slice.length);
intUpto += slice.length;
buffer[intUpto - 1] *= -1; // mark as end
return relativeOffset + intOffset;
}
/**
* Returns the first value of the slice stored at the given offset.
* <p>
* Note: the slice length must be greater than one otherwise the returned
* value is the negative complement of the actual value
* </p>
*/
public int getFirstFromOffset(int offset) {
final int blockOffset = offset >> blockShift;
final int relativeOffset = offset & blockMask;
final int[] currentBuffer = buffers[blockOffset];
assert currentBuffer[relativeOffset] >= 0;
return currentBuffer[relativeOffset];
}
/**
* Retrieves a previously stored slice from the pool.
*
* @param slice the slice to fill
* @param offset the offset where the slice is stored
*/
public void fill(IntsRef slice, int offset) {
final int blockOffset = offset >> blockShift;
final int relativeOffset = offset & blockMask;
final int[] currentBuffer = buffers[blockOffset];
slice.offset = 0;
slice.length = 0;
for (int i = relativeOffset; i < currentBuffer.length; i++) {
slice.length++;
if (currentBuffer[i] < 0) {
break;
}
}
if (slice.length != 0) {
slice.ints = ArrayUtil.grow(slice.ints, slice.length);
System.arraycopy(currentBuffer, relativeOffset, slice.ints, 0, slice.length);
slice.ints[slice.length-1] *= -1;
}
}
public long getMemorySizeInBytes() {
return ((bufferUpto + 1) * blockSize * RamUsage.NUM_BYTES_INT) + ((bufferUpto + 1) * RamUsage.NUM_BYTES_ARRAY_HEADER);
}
private void nextBuffer() {
if (1 + bufferUpto == buffers.length) {
int[][] newBuffers = new int[(int) (buffers.length * 1.5)][];
System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
buffers = newBuffers;
}
buffer = buffers[1 + bufferUpto] = new int[blockSize];
bufferUpto++;
intUpto = 0;
intOffset += blockSize;
}
}

View File

@ -19,7 +19,7 @@
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.RamUsage;
@ -29,15 +29,19 @@ public class SinglePackedOrdinals implements Ordinals {
// ordinals with value 0 indicates no value
private final PackedInts.Reader reader;
private final int numOrds;
private final int maxOrd;
private final long numOrds;
private final long maxOrd;
private long size = -1;
public SinglePackedOrdinals(PackedInts.Reader reader, int numOrds) {
public SinglePackedOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) {
assert builder.getNumMultiValuesDocs() == 0;
this.numOrds = builder.getNumOrds();
this.maxOrd = builder.getNumOrds() + 1;
// We don't reuse the builder as-is because it might have been built with a higher overhead ratio
final PackedInts.Mutable reader = PackedInts.getMutable(builder.maxDoc(), PackedInts.bitsRequired(getNumOrds()), acceptableOverheadRatio);
PackedInts.copy(builder.getFirstOrdinals(), 0, reader, 0, builder.maxDoc(), 8 * 1024);
this.reader = reader;
this.numOrds = numOrds;
this.maxOrd = numOrds + 1;
}
@Override
@ -72,12 +76,12 @@ public class SinglePackedOrdinals implements Ordinals {
}
@Override
public int getNumOrds() {
public long getNumOrds() {
return numOrds;
}
@Override
public int getMaxOrd() {
public long getMaxOrd() {
return maxOrd;
}
@ -91,7 +95,7 @@ public class SinglePackedOrdinals implements Ordinals {
private final SinglePackedOrdinals parent;
private final PackedInts.Reader reader;
private final IntsRef intsScratch = new IntsRef(1);
private final LongsRef longsScratch = new LongsRef(1);
private final SingleValueIter iter = new SingleValueIter();
public Docs(SinglePackedOrdinals parent, PackedInts.Reader reader) {
@ -110,12 +114,12 @@ public class SinglePackedOrdinals implements Ordinals {
}
@Override
public int getNumOrds() {
public long getNumOrds() {
return parent.getNumOrds();
}
@Override
public int getMaxOrd() {
public long getMaxOrd() {
return parent.getMaxOrd();
}
@ -125,21 +129,21 @@ public class SinglePackedOrdinals implements Ordinals {
}
@Override
public int getOrd(int docId) {
return (int) reader.get(docId);
public long getOrd(int docId) {
return reader.get(docId);
}
@Override
public IntsRef getOrds(int docId) {
final int ordinal = (int) reader.get(docId);
public LongsRef getOrds(int docId) {
final long ordinal = reader.get(docId);
if (ordinal == 0) {
intsScratch.length = 0;
longsScratch.length = 0;
} else {
intsScratch.offset = 0;
intsScratch.length = 1;
intsScratch.ints[0] = ordinal;
longsScratch.offset = 0;
longsScratch.length = 1;
longsScratch.longs[0] = ordinal;
}
return intsScratch;
return longsScratch;
}
@Override

View File

@ -1,216 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.IntsRef;
import org.elasticsearch.common.RamUsage;
/**
* Ordinals implementation that stores the ordinals into sparse fixed arrays.
* <p/>
* This prevents large ordinal arrays that are created in for example {@link MultiFlatArrayOrdinals} when
* only a few documents have a lot of terms per field.
*/
public final class SparseMultiArrayOrdinals implements Ordinals {
private final int[] lookup;
private final PositiveIntPool pool;
private final int numOrds;
private final int maxOrd;
private final int numDocs;
private long size = -1;
public SparseMultiArrayOrdinals(OrdinalsBuilder builder, int maxSize) {
int blockShift = Math.min(floorPow2(builder.getTotalNumOrds() << 1), floorPow2(maxSize));
this.pool = new PositiveIntPool(Math.max(4, blockShift));
this.numDocs = builder.maxDoc();
this.lookup = new int[numDocs];
this.numOrds = builder.getNumOrds();
this.maxOrd = numOrds + 1;
IntsRef spare;
for (int doc = 0; doc < numDocs; doc++) {
spare = builder.docOrds(doc);
final int size = spare.length;
if (size == 0) {
lookup[doc] = 0;
} else if (size == 1) {
lookup[doc] = spare.ints[spare.offset];
} else {
int offset = pool.put(spare);
lookup[doc] = -(offset) - 1;
}
}
}
private static int floorPow2(int number) {
return 31 - Integer.numberOfLeadingZeros(number);
}
@Override
public boolean hasSingleArrayBackingStorage() {
return false;
}
@Override
public Object getBackingStorage() {
return null;
}
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = (RamUsage.NUM_BYTES_ARRAY_HEADER + (RamUsage.NUM_BYTES_INT * lookup.length)) + pool.getMemorySizeInBytes();
}
return size;
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public int getNumDocs() {
return numDocs;
}
@Override
public int getNumOrds() {
return numOrds;
}
@Override
public int getMaxOrd() {
return maxOrd;
}
@Override
public Docs ordinals() {
return new Docs(this, lookup, pool);
}
static class Docs implements Ordinals.Docs {
private final SparseMultiArrayOrdinals parent;
private final int[] lookup;
private final IterImpl iter;
private final PositiveIntPool pool;
private final IntsRef spare = new IntsRef(1);
public Docs(SparseMultiArrayOrdinals parent, int[] lookup, PositiveIntPool pool) {
this.parent = parent;
this.lookup = lookup;
this.pool = pool;
this.iter = new IterImpl(lookup, pool);
}
@Override
public Ordinals ordinals() {
return this.parent;
}
@Override
public int getNumDocs() {
return parent.getNumDocs();
}
@Override
public int getNumOrds() {
return parent.getNumOrds();
}
@Override
public int getMaxOrd() {
return parent.getMaxOrd();
}
@Override
public boolean isMultiValued() {
return true;
}
@Override
public int getOrd(int docId) {
int pointer = lookup[docId];
if (pointer < 0) {
return pool.getFirstFromOffset(-(pointer + 1));
}
return pointer;
}
@Override
public IntsRef getOrds(int docId) {
spare.offset = 0;
int pointer = lookup[docId];
if (pointer == 0) {
spare.length = 0;
} else if (pointer > 0) {
spare.length = 1;
spare.ints[0] = pointer;
return spare;
} else {
pool.fill(spare, -(pointer + 1));
return spare;
}
return spare;
}
@Override
public Iter getIter(int docId) {
return iter.reset(docId);
}
class IterImpl implements Docs.Iter {
private final int[] lookup;
private final PositiveIntPool pool;
private final IntsRef slice = new IntsRef(1);
private int valuesOffset;
public IterImpl(int[] lookup, PositiveIntPool pool) {
this.lookup = lookup;
this.pool = pool;
}
public IterImpl reset(int docId) {
final int pointer = lookup[docId];
if (pointer < 0) {
pool.fill(slice, -(pointer + 1));
} else {
slice.ints[0] = pointer;
slice.offset = 0;
slice.length = 1;
}
valuesOffset = 0;
return this;
}
@Override
public int next() {
if (valuesOffset >= slice.length) {
return 0;
}
return slice.ints[slice.offset + (valuesOffset++)];
}
}
}
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.util.BigDoubleArrayList;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
@ -30,14 +31,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
public static final DoubleArrayAtomicFieldData EMPTY = new Empty();
protected final double[] values;
private final int numDocs;
protected long size = -1;
public DoubleArrayAtomicFieldData(double[] values, int numDocs) {
public DoubleArrayAtomicFieldData(int numDocs) {
super(true);
this.values = values;
this.numDocs = numDocs;
}
@ -53,7 +52,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
static class Empty extends DoubleArrayAtomicFieldData {
Empty() {
super(null, 0);
super(0);
}
@Override
@ -94,10 +93,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
public static class WithOrdinals extends DoubleArrayAtomicFieldData {
private final BigDoubleArrayList values;
private final Ordinals ordinals;
public WithOrdinals(double[] values, int numDocs, Ordinals ordinals) {
super(values, numDocs);
public WithOrdinals(BigDoubleArrayList values, int numDocs, Ordinals ordinals) {
super(numDocs);
this.values = values;
this.ordinals = ordinals;
}
@ -114,7 +115,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + +RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + ordinals.getMemorySizeInBytes();
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes();
}
return size;
}
@ -133,31 +134,31 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
private final double[] values;
private final BigDoubleArrayList values;
LongValues(double[] values, Ordinals.Docs ordinals) {
LongValues(BigDoubleArrayList values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public final long getValueByOrd(int ord) {
return (long) values[ord];
public final long getValueByOrd(long ord) {
return (long) values.get(ord);
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
private final double[] values;
private final BigDoubleArrayList values;
DoubleValues(double[] values, Ordinals.Docs ordinals) {
DoubleValues(BigDoubleArrayList values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public double getValueByOrd(int ord) {
return values[ord];
public double getValueByOrd(long ord) {
return values.get(ord);
}
}
}
@ -168,10 +169,12 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
*/
public static class SingleFixedSet extends DoubleArrayAtomicFieldData {
private final BigDoubleArrayList values;
private final FixedBitSet set;
public SingleFixedSet(double[] values, int numDocs, FixedBitSet set) {
super(values, numDocs);
public SingleFixedSet(BigDoubleArrayList values, int numDocs, FixedBitSet set) {
super(numDocs);
this.values = values;
this.set = set;
}
@ -188,7 +191,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
}
return size;
}
@ -205,10 +208,10 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
private final double[] values;
private final BigDoubleArrayList values;
private final FixedBitSet set;
LongValues(double[] values, FixedBitSet set) {
LongValues(BigDoubleArrayList values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
@ -221,16 +224,16 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
@Override
public long getValue(int docId) {
return (long) values[docId];
return (long) values.get(docId);
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
private final double[] values;
private final BigDoubleArrayList values;
private final FixedBitSet set;
DoubleValues(double[] values, FixedBitSet set) {
DoubleValues(BigDoubleArrayList values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
@ -243,7 +246,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
@Override
public double getValue(int docId) {
return values[docId];
return values.get(docId);
}
}
@ -254,12 +257,15 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
*/
public static class Single extends DoubleArrayAtomicFieldData {
private final BigDoubleArrayList values;
/**
* Note, here, we assume that there is no offset by 1 from docId, so position 0
* is the value for docId 0.
*/
public Single(double[] values, int numDocs) {
super(values, numDocs);
public Single(BigDoubleArrayList values, int numDocs) {
super(numDocs);
this.values = values;
}
@Override
@ -275,7 +281,7 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_DOUBLE);
size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes();
}
return size;
}
@ -292,32 +298,32 @@ public abstract class DoubleArrayAtomicFieldData extends AtomicNumericFieldData
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
private final double[] values;
private final BigDoubleArrayList values;
LongValues(double[] values) {
LongValues(BigDoubleArrayList values) {
super(false);
this.values = values;
}
@Override
public long getValue(int docId) {
return (long) values[docId];
return (long) values.get(docId);
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
private final double[] values;
private final BigDoubleArrayList values;
DoubleValues(double[] values) {
DoubleValues(BigDoubleArrayList values) {
super(false);
this.values = values;
}
@Override
public double getValue(int docId) {
return values[docId];
return values.get(docId);
}
}

View File

@ -19,7 +19,6 @@
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.list.array.TDoubleArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
@ -27,11 +26,11 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.BigDoubleArrayList;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
@ -49,7 +48,7 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
public IndexFieldData<?> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new DoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
@ -92,11 +91,11 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
return DoubleArrayAtomicFieldData.EMPTY;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
final TDoubleArrayList values = new TDoubleArrayList();
final BigDoubleArrayList values = new BigDoubleArrayList();
values.add(0); // first "t" indicates null value
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio);
try {
final BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
BytesRef term;
@ -113,23 +112,23 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData<DoubleArra
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_DOUBLE;
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new DoubleArrayAtomicFieldData.WithOrdinals(values.toArray(new double[values.size()]), reader.maxDoc(), build);
return new DoubleArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
}
double[] sValues = new double[reader.maxDoc()];
int maxDoc = reader.maxDoc();
BigDoubleArrayList sValues = new BigDoubleArrayList(maxDoc);
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
sValues.add(values.get(ordinals.getOrd(i)));
}
assert sValues.size() == maxDoc;
if (set == null) {
return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc());
return new DoubleArrayAtomicFieldData.Single(sValues, maxDoc);
} else {
return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, maxDoc, set);
}
} else {
return new DoubleArrayAtomicFieldData.WithOrdinals(
values.toArray(new double[values.size()]),
values,
reader.maxDoc(),
build);
}

View File

@ -21,12 +21,10 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.*;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.Util;
import org.elasticsearch.common.util.BigIntArray;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
@ -46,7 +44,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
// 0 ordinal in values means no value (its null)
protected final Ordinals ordinals;
private volatile int[] hashes;
private volatile BigIntArray hashes;
private long size = -1;
private final FST<Long> fst;
@ -104,18 +102,17 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
assert fst != null;
if (hashes == null) {
BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
int[] hashes = new int[ordinals.getMaxOrd()];
InputOutput<Long> next;
BigIntArray hashes = new BigIntArray(ordinals.getMaxOrd());
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
// empty strings twice. ie. them merge fails for long output.
hashes[0] = new BytesRef().hashCode();
int i = 1;
hashes.set(0, new BytesRef().hashCode());
try {
while ((next = fstEnum.next()) != null) {
hashes[i++] = next.input.hashCode();
for (long i = 1, maxOrd = ordinals.getMaxOrd(); i < maxOrd; ++i) {
hashes.set(i, fstEnum.next().input.hashCode());
}
} catch (IOException ex) {
//bogus
assert fstEnum.next() == null;
} catch (IOException e) {
throw new AssertionError("Cannot happen", e);
}
this.hashes = hashes;
}
@ -141,7 +138,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
}
@Override
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
public BytesRef getValueScratchByOrd(long ord, BytesRef ret) {
if (ord == 0) {
ret.length = 0;
return ret;
@ -170,16 +167,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
long ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
return iter.reset(getValueByOrd(ord), ord);
}
}
static final class SingleHashed extends Single {
private final int[] hashes;
private final BigIntArray hashes;
SingleHashed(FST<Long> fst, Docs ordinals, int[] hashes) {
SingleHashed(FST<Long> fst, Docs ordinals, BigIntArray hashes) {
super(fst, ordinals);
this.hashes = hashes;
}
@ -188,16 +185,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
protected Iter.Single newSingleIter() {
return new Iter.Single() {
public int hash() {
return hashes[ord];
return hashes.get(ord);
}
};
}
@Override
public int getValueHashed(int docId, BytesRef ret) {
final int ord = ordinals.getOrd(docId);
final long ord = ordinals.getOrd(docId);
getValueScratchByOrd(ord, ret);
return hashes[ord];
return hashes.get(ord);
}
}
@ -219,9 +216,9 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
static final class MultiHashed extends Multi {
private final int[] hashes;
private final BigIntArray hashes;
MultiHashed(FST<Long> fst, Docs ordinals, int[] hashes) {
MultiHashed(FST<Long> fst, Docs ordinals, BigIntArray hashes) {
super(fst, ordinals);
this.hashes = hashes;
}
@ -230,16 +227,16 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
protected Iter.Multi newMultiIter() {
return new Iter.Multi(this) {
public int hash() {
return hashes[ord];
return hashes.get(ord);
}
};
}
@Override
public int getValueHashed(int docId, BytesRef ret) {
final int ord = ordinals.getOrd(docId);
final long ord = ordinals.getOrd(docId);
getValueScratchByOrd(ord, ret);
return hashes[ord];
return hashes.get(ord);
}
}

View File

@ -26,7 +26,6 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.INPUT_TYPE;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -65,9 +64,14 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
org.apache.lucene.util.fst.Builder<Long> fstBuilder = new org.apache.lucene.util.fst.Builder<Long>(INPUT_TYPE.BYTE1, outputs);
final IntsRef scratch = new IntsRef();
boolean preDefineBitsRequired = regex == null && frequency == null;
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio);
final long numTerms;
if (regex == null && frequency == null) {
numTerms = terms.size();
} else {
numTerms = -1;
}
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio);
try {
// we don't store an ord 0 in the FST since we could have an empty string in there and FST don't support
@ -75,7 +79,7 @@ public class FSTBytesIndexFieldData extends AbstractBytesIndexFieldData<FSTBytes
TermsEnum termsEnum = filter(terms, reader);
DocsEnum docsEnum = null;
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
final int termOrd = builder.nextOrdinal();
final long termOrd = builder.nextOrdinal();
assert termOrd > 0;
fstBuilder.add(Util.toIntsRef(term, scratch), (long)termOrd);
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.util.BigFloatArrayList;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
@ -30,14 +31,12 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
public static final FloatArrayAtomicFieldData EMPTY = new Empty();
protected final float[] values;
private final int numDocs;
protected long size = -1;
public FloatArrayAtomicFieldData(float[] values, int numDocs) {
public FloatArrayAtomicFieldData(int numDocs) {
super(true);
this.values = values;
this.numDocs = numDocs;
}
@ -53,7 +52,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
static class Empty extends FloatArrayAtomicFieldData {
Empty() {
super(null, 0);
super(0);
}
@Override
@ -95,9 +94,11 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
public static class WithOrdinals extends FloatArrayAtomicFieldData {
private final Ordinals ordinals;
private final BigFloatArrayList values;
public WithOrdinals(float[] values, int numDocs, Ordinals ordinals) {
super(values, numDocs);
public WithOrdinals(BigFloatArrayList values, int numDocs, Ordinals ordinals) {
super(numDocs);
this.values = values;
this.ordinals = ordinals;
}
@ -114,7 +115,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT) + ordinals.getMemorySizeInBytes();
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + values.sizeInBytes() + ordinals.getMemorySizeInBytes();
}
return size;
}
@ -131,31 +132,31 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.WithOrdinals {
private final float[] values;
private final BigFloatArrayList values;
LongValues(float[] values, Ordinals.Docs ordinals) {
LongValues(BigFloatArrayList values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public long getValueByOrd(int ord) {
return (long) values[ord];
public long getValueByOrd(long ord) {
return (long) values.get(ord);
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.WithOrdinals {
private final float[] values;
private final BigFloatArrayList values;
DoubleValues(float[] values, Ordinals.Docs ordinals) {
DoubleValues(BigFloatArrayList values, Ordinals.Docs ordinals) {
super(ordinals);
this.values = values;
}
@Override
public double getValueByOrd(int ord) {
return values[ord];
public double getValueByOrd(long ord) {
return values.get(ord);
}
}
}
@ -166,10 +167,12 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
*/
public static class SingleFixedSet extends FloatArrayAtomicFieldData {
private final BigFloatArrayList values;
private final FixedBitSet set;
public SingleFixedSet(float[] values, int numDocs, FixedBitSet set) {
super(values, numDocs);
public SingleFixedSet(BigFloatArrayList values, int numDocs, FixedBitSet set) {
super(numDocs);
this.values = values;
this.set = set;
}
@ -186,7 +189,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT) + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes() + (set.getBits().length * RamUsage.NUM_BYTES_LONG);
}
return size;
}
@ -204,10 +207,10 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
static class LongValues extends org.elasticsearch.index.fielddata.LongValues {
private final float[] values;
private final BigFloatArrayList values;
private final FixedBitSet set;
LongValues(float[] values, FixedBitSet set) {
LongValues(BigFloatArrayList values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
@ -220,16 +223,16 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
@Override
public long getValue(int docId) {
return (long) values[docId];
return (long) values.get(docId);
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues {
private final float[] values;
private final BigFloatArrayList values;
private final FixedBitSet set;
DoubleValues(float[] values, FixedBitSet set) {
DoubleValues(BigFloatArrayList values, FixedBitSet set) {
super(false);
this.values = values;
this.set = set;
@ -242,7 +245,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
@Override
public double getValue(int docId) {
return (double) values[docId];
return (double) values.get(docId);
}
}
@ -254,12 +257,15 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
*/
public static class Single extends FloatArrayAtomicFieldData {
private final BigFloatArrayList values;
/**
* Note, here, we assume that there is no offset by 1 from docId, so position 0
* is the value for docId 0.
*/
public Single(float[] values, int numDocs) {
super(values, numDocs);
public Single(BigFloatArrayList values, int numDocs) {
super(numDocs);
this.values = values;
}
@Override
@ -275,7 +281,7 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_ARRAY_HEADER + (values.length * RamUsage.NUM_BYTES_FLOAT);
size = RamUsage.NUM_BYTES_ARRAY_HEADER + values.sizeInBytes();
}
return size;
}
@ -293,32 +299,32 @@ public abstract class FloatArrayAtomicFieldData extends AtomicNumericFieldData {
static class LongValues extends org.elasticsearch.index.fielddata.LongValues.Dense {
private final float[] values;
private final BigFloatArrayList values;
LongValues(float[] values) {
LongValues(BigFloatArrayList values) {
super(false);
this.values = values;
}
@Override
public long getValue(int docId) {
return (long) values[docId];
return (long) values.get(docId);
}
}
static class DoubleValues extends org.elasticsearch.index.fielddata.DoubleValues.Dense {
private final float[] values;
private final BigFloatArrayList values;
DoubleValues(float[] values) {
DoubleValues(BigFloatArrayList values) {
super(false);
this.values = values;
}
@Override
public double getValue(int docId) {
return (double) values[docId];
return (double) values.get(docId);
}
}
}

View File

@ -19,7 +19,6 @@
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.list.array.TFloatArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
@ -27,11 +26,11 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.BigFloatArrayList;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
@ -49,7 +48,7 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
public IndexFieldData<?> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new FloatArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
@ -91,12 +90,12 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
return FloatArrayAtomicFieldData.EMPTY;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
final TFloatArrayList values = new TFloatArrayList();
final BigFloatArrayList values = new BigFloatArrayList();
values.add(0); // first "t" indicates null value
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
OrdinalsBuilder builder = new OrdinalsBuilder(reader.maxDoc(), acceptableTransientOverheadRatio);
try {
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
BytesRef term;
@ -110,25 +109,26 @@ public class FloatArrayIndexFieldData extends AbstractIndexFieldData<FloatArrayA
// there's sweatspot where due to low unique value count, using ordinals will consume less memory
long singleValuesArraySize = reader.maxDoc() * RamUsage.NUM_BYTES_FLOAT + (set == null ? 0 : set.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_INT);
long uniqueValuesArraySize = values.size() * RamUsage.NUM_BYTES_FLOAT;
long uniqueValuesArraySize = values.sizeInBytes();
long ordinalsSize = build.getMemorySizeInBytes();
if (uniqueValuesArraySize + ordinalsSize < singleValuesArraySize) {
return new FloatArrayAtomicFieldData.WithOrdinals(values.toArray(new float[values.size()]), reader.maxDoc(), build);
return new FloatArrayAtomicFieldData.WithOrdinals(values, reader.maxDoc(), build);
}
float[] sValues = new float[reader.maxDoc()];
int maxDoc = reader.maxDoc();
BigFloatArrayList sValues = new BigFloatArrayList(maxDoc);
for (int i = 0; i < maxDoc; i++) {
sValues[i] = values.get(ordinals.getOrd(i));
sValues.add(values.get(ordinals.getOrd(i)));
}
assert sValues.size() == maxDoc;
if (set == null) {
return new FloatArrayAtomicFieldData.Single(sValues, reader.maxDoc());
return new FloatArrayAtomicFieldData.Single(sValues, maxDoc);
} else {
return new FloatArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set);
return new FloatArrayAtomicFieldData.SingleFixedSet(sValues, maxDoc, set);
}
} else {
return new FloatArrayAtomicFieldData.WithOrdinals(
values.toArray(new float[values.size()]),
values,
reader.maxDoc(),
build);
}

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.fielddata.plain;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.RamUsage;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.util.BigDoubleArrayList;
import org.elasticsearch.index.fielddata.AtomicGeoPointFieldData;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.GeoPointValues;
@ -34,15 +35,11 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
public static final GeoPointDoubleArrayAtomicFieldData EMPTY = new Empty();
protected final double[] lon;
protected final double[] lat;
private final int numDocs;
protected long size = -1;
public GeoPointDoubleArrayAtomicFieldData(double[] lon, double[] lat, int numDocs) {
this.lon = lon;
this.lat = lat;
public GeoPointDoubleArrayAtomicFieldData(int numDocs) {
this.numDocs = numDocs;
}
@ -63,7 +60,7 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
static class Empty extends GeoPointDoubleArrayAtomicFieldData {
Empty() {
super(null, null, 0);
super(0);
}
@Override
@ -99,10 +96,13 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
public static class WithOrdinals extends GeoPointDoubleArrayAtomicFieldData {
private final BigDoubleArrayList lon, lat;
private final Ordinals ordinals;
public WithOrdinals(double[] lon, double[] lat, int numDocs, Ordinals ordinals) {
super(lon, lat, numDocs);
public WithOrdinals(BigDoubleArrayList lon, BigDoubleArrayList lat, int numDocs, Ordinals ordinals) {
super(numDocs);
this.lon = lon;
this.lat = lat;
this.ordinals = ordinals;
}
@ -119,7 +119,7 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
@Override
public long getMemorySizeInBytes() {
if (size == -1) {
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + (RamUsage.NUM_BYTES_ARRAY_HEADER + (lon.length * RamUsage.NUM_BYTES_DOUBLE)) + (RamUsage.NUM_BYTES_ARRAY_HEADER + (lat.length * RamUsage.NUM_BYTES_DOUBLE)) + ordinals.getMemorySizeInBytes();
size = RamUsage.NUM_BYTES_INT/*size*/ + RamUsage.NUM_BYTES_INT/*numDocs*/ + lon.sizeInBytes() + lat.sizeInBytes();
}
return size;
}
@ -131,15 +131,14 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
static class GeoPointValues implements org.elasticsearch.index.fielddata.GeoPointValues {
private final double[] lon;
private final double[] lat;
private final BigDoubleArrayList lon, lat;
private final Ordinals.Docs ordinals;
private final GeoPoint scratch = new GeoPoint();
private final ValuesIter valuesIter;
private final SafeValuesIter safeValuesIter;
GeoPointValues(double[] lon, double[] lat, Ordinals.Docs ordinals) {
GeoPointValues(BigDoubleArrayList lon, BigDoubleArrayList lat, Ordinals.Docs ordinals) {
this.lon = lon;
this.lat = lat;
this.ordinals = ordinals;
@ -159,20 +158,20 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
@Override
public GeoPoint getValue(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
long ord = ordinals.getOrd(docId);
if (ord == 0L) {
return null;
}
return scratch.reset(lat[ord], lon[ord]);
return scratch.reset(lat.get(ord), lon.get(ord));
}
@Override
public GeoPoint getValueSafe(int docId) {
int ord = ordinals.getOrd(docId);
if (ord == 0) {
long ord = ordinals.getOrd(docId);
if (ord == 0L) {
return null;
}
return new GeoPoint(lat[ord], lon[ord]);
return new GeoPoint(lat.get(ord), lon.get(ord));
}
@Override
@ -188,14 +187,13 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
static class ValuesIter implements Iter {
private final double[] lon;
private final double[] lat;
private final BigDoubleArrayList lon, lat;
private final GeoPoint scratch = new GeoPoint();
private Ordinals.Docs.Iter ordsIter;
private int ord;
private long ord;
ValuesIter(double[] lon, double[] lat) {
ValuesIter(BigDoubleArrayList lon, BigDoubleArrayList lat) {
this.lon = lon;
this.lat = lat;
}
@ -213,7 +211,7 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
@Override
public GeoPoint next() {
scratch.reset(lat[ord], lon[ord]);
scratch.reset(lat.get(ord), lon.get(ord));
ord = ordsIter.next();
return scratch;
}
@ -221,13 +219,12 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
static class SafeValuesIter implements Iter {
private final double[] lon;
private final double[] lat;
private final BigDoubleArrayList lon, lat;
private Ordinals.Docs.Iter ordsIter;
private int ord;
private long ord;
SafeValuesIter(double[] lon, double[] lat) {
SafeValuesIter(BigDoubleArrayList lon, BigDoubleArrayList lat) {
this.lon = lon;
this.lat = lat;
}
@ -245,7 +242,7 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
@Override
public GeoPoint next() {
GeoPoint value = new GeoPoint(lat[ord], lon[ord]);
GeoPoint value = new GeoPoint(lat.get(ord), lon.get(ord));
ord = ordsIter.next();
return value;
}
@ -258,10 +255,13 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
*/
public static class SingleFixedSet extends GeoPointDoubleArrayAtomicFieldData {
private final double[] lon, lat;
private final FixedBitSet set;
public SingleFixedSet(double[] lon, double[] lat, int numDocs, FixedBitSet set) {
super(lon, lat, numDocs);
super(numDocs);
this.lon = lon;
this.lat = lat;
this.set = set;
}
@ -357,8 +357,12 @@ public abstract class GeoPointDoubleArrayAtomicFieldData extends AtomicGeoPointF
*/
public static class Single extends GeoPointDoubleArrayAtomicFieldData {
private final double[] lon, lat;
public Single(double[] lon, double[] lat, int numDocs) {
super(lon, lat, numDocs);
super(numDocs);
this.lon = lon;
this.lat = lat;
}
@Override

View File

@ -19,16 +19,15 @@
package org.elasticsearch.index.fielddata.plain;
import gnu.trove.list.array.TDoubleArrayList;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.util.*;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.BigDoubleArrayList;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.*;
import org.elasticsearch.index.fielddata.fieldcomparator.SortMode;
@ -45,7 +44,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
public static class Builder implements IndexFieldData.Builder {
@Override
public IndexFieldData build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
public IndexFieldData<?> build(Index index, @IndexSettings Settings indexSettings, FieldMapper.Names fieldNames, FieldDataType type, IndexFieldDataCache cache) {
return new GeoPointDoubleArrayIndexFieldData(index, indexSettings, fieldNames, type, cache);
}
}
@ -83,12 +82,12 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
return GeoPointDoubleArrayAtomicFieldData.EMPTY;
}
// TODO: how can we guess the number of terms? numerics end up creating more terms per value...
final TDoubleArrayList lat = new TDoubleArrayList();
final TDoubleArrayList lon = new TDoubleArrayList();
final BigDoubleArrayList lat = new BigDoubleArrayList();
final BigDoubleArrayList lon = new BigDoubleArrayList();
lat.add(0); // first "t" indicates null value
lon.add(0); // first "t" indicates null value
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
OrdinalsBuilder builder = new OrdinalsBuilder(terms.size(), reader.maxDoc(), acceptableTransientOverheadRatio);
final CharsRef spare = new CharsRef();
try {
BytesRefIterator iter = builder.buildFromTerms(terms.iterator(null));
@ -113,7 +112,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
double[] sLat = new double[reader.maxDoc()];
double[] sLon = new double[reader.maxDoc()];
for (int i = 0; i < sLat.length; i++) {
int nativeOrdinal = ordinals.getOrd(i);
long nativeOrdinal = ordinals.getOrd(i);
sLat[i] = lat.get(nativeOrdinal);
sLon[i] = lon.get(nativeOrdinal);
}
@ -125,8 +124,7 @@ public class GeoPointDoubleArrayIndexFieldData extends AbstractIndexFieldData<Ge
}
} else {
return new GeoPointDoubleArrayAtomicFieldData.WithOrdinals(
lon.toArray(new double[lon.size()]),
lat.toArray(new double[lat.size()]),
lon, lat,
reader.maxDoc(), build);
}
} finally {

View File

@ -141,7 +141,7 @@ public abstract class PackedArrayAtomicFieldData extends AtomicNumericFieldData
}
@Override
public long getValueByOrd(int ord) {
public long getValueByOrd(long ord) {
return ord == 0 ? 0L : values.get(ord - 1);
}
}
@ -156,7 +156,7 @@ public abstract class PackedArrayAtomicFieldData extends AtomicNumericFieldData
}
@Override
public double getValueByOrd(int ord) {
public double getValueByOrd(long ord) {
return ord == 0 ? 0L : values.get(ord - 1);
}

View File

@ -110,8 +110,8 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
// longs is going to be monotonically increasing
final MonotonicAppendingLongBuffer values = new MonotonicAppendingLongBuffer();
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, reader.maxDoc(), acceptableOverheadRatio);
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
OrdinalsBuilder builder = new OrdinalsBuilder(-1, reader.maxDoc(), acceptableTransientOverheadRatio);
try {
BytesRefIterator iter = builder.buildFromTerms(getNumericType().wrapTermsEnum(terms.iterator(null)));
BytesRef term;
@ -161,6 +161,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
final long delta = maxValue - minValue;
final int bitsRequired = delta < 0 ? 64 : PackedInts.bitsRequired(delta);
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
final PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
@ -177,7 +178,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData<AtomicNume
sValues.fill(0, sValues.size(), missingValue);
}
for (int i = 0; i < reader.maxDoc(); i++) {
final int ord = ordinals.getOrd(i);
final long ord = ordinals.getOrd(i);
if (ord > 0) {
sValues.set(i, values.get(ord - 1) - minValue);
}

View File

@ -23,6 +23,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.PagedBytes.Reader;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.elasticsearch.common.util.BigIntArray;
import org.elasticsearch.index.fielddata.AtomicFieldData;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
@ -42,7 +43,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
private final MonotonicAppendingLongBuffer termOrdToBytesOffset;
protected final Ordinals ordinals;
private volatile int[] hashes;
private volatile BigIntArray hashes;
private long size = -1;
private final long readerBytesSize;
@ -85,14 +86,14 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
return size;
}
private final int[] getHashes() {
private final BigIntArray getHashes() {
if (hashes == null) {
int numberOfValues = (int) termOrdToBytesOffset.size();
int[] hashes = new int[numberOfValues];
long numberOfValues = termOrdToBytesOffset.size();
BigIntArray hashes = new BigIntArray(numberOfValues);
BytesRef scratch = new BytesRef();
for (int i = 0; i < numberOfValues; i++) {
for (long i = 0; i < numberOfValues; i++) {
bytes.fill(scratch, termOrdToBytesOffset.get(i));
hashes[i] = scratch.hashCode();
hashes.set(i, scratch.hashCode());
}
this.hashes = hashes;
}
@ -107,7 +108,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
@Override
public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getHashedBytesValues() {
final int[] hashes = getHashes();
final BigIntArray hashes = getHashes();
return ordinals.isMultiValued() ? new BytesValues.MultiHashed(hashes, bytes, termOrdToBytesOffset, ordinals.ordinals())
: new BytesValues.SingleHashed(hashes, bytes, termOrdToBytesOffset, ordinals.ordinals());
}
@ -145,7 +146,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
}
@Override
public BytesRef getValueScratchByOrd(int ord, BytesRef ret) {
public BytesRef getValueScratchByOrd(long ord, BytesRef ret) {
bytes.fill(ret, termOrdToBytesOffset.get(ord));
return ret;
}
@ -163,7 +164,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
long ord = ordinals.getOrd(docId);
if (ord == 0) return Iter.Empty.INSTANCE;
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return iter.reset(scratch, ord);
@ -172,9 +173,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
}
static final class SingleHashed extends Single {
private final int[] hashes;
private final BigIntArray hashes;
SingleHashed(int[] hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
SingleHashed(BigIntArray hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
this.hashes = hashes;
}
@ -183,16 +184,16 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
protected Iter.Single newSingleIter() {
return new Iter.Single() {
public int hash() {
return hashes[ord];
return hashes.get(ord);
}
};
}
@Override
public int getValueHashed(int docId, BytesRef ret) {
final int ord = ordinals.getOrd(docId);
final long ord = ordinals.getOrd(docId);
getValueScratchByOrd(ord, ret);
return hashes[ord];
return hashes.get(ord);
}
}
@ -216,9 +217,9 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
static final class MultiHashed extends Multi {
private final int[] hashes;
private final BigIntArray hashes;
MultiHashed(int[] hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
MultiHashed(BigIntArray hashes, Reader bytes, MonotonicAppendingLongBuffer termOrdToBytesOffset, Docs ordinals) {
super(bytes, termOrdToBytesOffset, ordinals);
this.hashes = hashes;
}
@ -227,16 +228,16 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
protected Iter.Multi newMultiIter() {
return new Iter.Multi(this) {
public int hash() {
return hashes[ord];
return hashes.get(ord);
}
};
}
@Override
public int getValueHashed(int docId, BytesRef ret) {
int ord = ordinals.getOrd(docId);
long ord = ordinals.getOrd(docId);
getValueScratchByOrd(ord, ret);
return hashes[ord];
return hashes.get(ord);
}
}

View File

@ -23,7 +23,6 @@ import org.apache.lucene.index.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.fielddata.FieldDataType;
@ -61,39 +60,23 @@ public class PagedBytesIndexFieldData extends AbstractBytesIndexFieldData<PagedB
final PagedBytes bytes = new PagedBytes(15);
int maxDoc = reader.maxDoc();
final int termCountHardLimit;
if (maxDoc == Integer.MAX_VALUE) {
termCountHardLimit = Integer.MAX_VALUE;
} else {
termCountHardLimit = maxDoc + 1;
}
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
// is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = terms.size();
if (numUniqueTerms != -1L) {
if (numUniqueTerms > termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
}
}
final MonotonicAppendingLongBuffer termOrdToBytesOffset = new MonotonicAppendingLongBuffer();
termOrdToBytesOffset.add(0); // first ord is reserved for missing values
boolean preDefineBitsRequired = regex == null && frequency == null;
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
OrdinalsBuilder builder = new OrdinalsBuilder(terms, preDefineBitsRequired, reader.maxDoc(), acceptableOverheadRatio);
final long numTerms;
if (regex == null && frequency == null) {
numTerms = terms.size();
} else {
numTerms = -1;
}
final float acceptableTransientOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_transient_overhead_ratio", OrdinalsBuilder.DEFAULT_ACCEPTABLE_OVERHEAD_RATIO);
OrdinalsBuilder builder = new OrdinalsBuilder(numTerms, reader.maxDoc(), acceptableTransientOverheadRatio);
try {
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());
TermsEnum termsEnum = filter(terms, reader);
DocsEnum docsEnum = null;
for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) {
final int termOrd = builder.nextOrdinal();
final long termOrd = builder.nextOrdinal();
assert termOrd == termOrdToBytesOffset.size();
termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);

View File

@ -27,6 +27,8 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.cache.recycler.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.common.util.IntArray;
import org.elasticsearch.common.util.IntArrays;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
@ -114,7 +116,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we get around this?)
int count = 0;
do {
count += agg.counts[agg.position];
count += agg.counts.get(agg.position);
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
@ -144,12 +146,6 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop();
}
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.counts.length > ordinalsCacheAbove) {
cacheRecycler.pushIntArray(aggregator.counts);
}
}
return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
}
@ -160,7 +156,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we work around that?)
int count = 0;
do {
count += agg.counts[agg.position];
count += agg.counts.get(agg.position);
if (agg.nextPosition()) {
agg = queue.updateTop();
} else {
@ -186,13 +182,6 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
}
}
for (ReaderAggregator aggregator : aggregators) {
if (aggregator.counts.length > ordinalsCacheAbove) {
cacheRecycler.pushIntArray(aggregator.counts);
}
}
return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total);
}
@ -207,8 +196,8 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
missing += current.counts.get(0);
total += current.total - current.counts.get(0);
if (current.values.ordinals().getNumOrds() > 0) {
aggregators.add(current);
}
@ -221,7 +210,7 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
@Override
public void collect(int doc) throws IOException {
Iter iter = ordinals.getIter(doc);
int ord = iter.next();
long ord = iter.next();
current.onOrdinal(doc, ord);
while ((ord = iter.next()) != 0) {
current.onOrdinal(doc, ord);
@ -231,8 +220,8 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
@Override
public void postCollection() {
if (current != null) {
missing += current.counts[0];
total += current.total - current.counts[0];
missing += current.counts.get(0);
total += current.total - current.counts.get(0);
// if we have values for this one, add it
if (current.values.ordinals().getNumOrds() > 0) {
aggregators.add(current);
@ -247,26 +236,21 @@ public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {
public static final class ReaderAggregator {
final BytesValues.WithOrdinals values;
final int[] counts;
final IntArray counts;
int position = 0;
long position = 0;
BytesRef current;
int total;
private final int maxOrd;
private final long maxOrd;
public ReaderAggregator(BytesValues.WithOrdinals values, int ordinalsCacheLimit, CacheRecycler cacheRecycler) {
this.values = values;
this.maxOrd = values.ordinals().getMaxOrd();
if (maxOrd > ordinalsCacheLimit) {
this.counts = cacheRecycler.popIntArray(maxOrd);
} else {
this.counts = new int[maxOrd];
}
this.counts = IntArrays.allocate(maxOrd);
}
final void onOrdinal(int docId, int ordinal) {
counts[ordinal]++;
final void onOrdinal(int docId, long ordinal) {
counts.increment(ordinal, 1);
total++;
}

View File

@ -82,7 +82,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
Docs ordinals = bytesValues.ordinals();
assertThat(2, equalTo(ordinals.getNumOrds()));
assertThat(2L, equalTo(ordinals.getNumOrds()));
assertThat(1000, equalTo(ordinals.getNumDocs()));
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
@ -95,7 +95,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
Docs ordinals = bytesValues.ordinals();
assertThat(1, equalTo(ordinals.getNumOrds()));
assertThat(1L, equalTo(ordinals.getNumOrds()));
assertThat(1000, equalTo(ordinals.getNumDocs()));
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5"));
}
@ -108,7 +108,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
Docs ordinals = bytesValues.ordinals();
assertThat(2, equalTo(ordinals.getNumOrds()));
assertThat(2L, equalTo(ordinals.getNumOrds()));
assertThat(1000, equalTo(ordinals.getNumDocs()));
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
@ -122,7 +122,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
Docs ordinals = bytesValues.ordinals();
assertThat(2, equalTo(ordinals.getNumOrds()));
assertThat(2L, equalTo(ordinals.getNumOrds()));
assertThat(1000, equalTo(ordinals.getNumDocs()));
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("100"));
@ -139,7 +139,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
Docs ordinals = bytesValues.ordinals();
assertThat(1, equalTo(ordinals.getNumOrds()));
assertThat(1L, equalTo(ordinals.getNumOrds()));
assertThat(1000, equalTo(ordinals.getNumDocs()));
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("100"));
}
@ -184,7 +184,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
Docs ordinals = bytesValues.ordinals();
assertThat(1, equalTo(ordinals.getNumOrds()));
assertThat(1L, equalTo(ordinals.getNumOrds()));
assertThat(1000, equalTo(ordinals.getNumDocs()));
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("5"));
}
@ -196,7 +196,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> loadDirect = (WithOrdinals<Strings>) fieldData.loadDirect(context);
BytesValues.WithOrdinals bytesValues = loadDirect.getBytesValues();
Docs ordinals = bytesValues.ordinals();
assertThat(2, equalTo(ordinals.getNumOrds()));
assertThat(2L, equalTo(ordinals.getNumOrds()));
assertThat(1000, equalTo(ordinals.getNumDocs()));
assertThat(bytesValues.getValueByOrd(1).utf8ToString(), equalTo("10"));
assertThat(bytesValues.getValueByOrd(2).utf8ToString(), equalTo("5"));

View File

@ -1,35 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.fielddata.ordinals;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
/**
*/
public class FlatMultiOrdinalsTests extends MultiOrdinalsTests {
@Override
protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) {
settings.put("multi_ordinals", "flat");
return builder.build(settings.build());
}
}

View File

@ -19,8 +19,9 @@
package org.elasticsearch.test.unit.index.fielddata.ordinals;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LongsRef;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.fielddata.ordinals.MultiOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.testng.annotations.Test;
@ -30,18 +31,20 @@ import java.util.*;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
/**
*/
public abstract class MultiOrdinalsTests {
public class MultiOrdinalsTests {
protected final Ordinals creationMultiOrdinals(OrdinalsBuilder builder) {
return this.creationMultiOrdinals(builder, ImmutableSettings.builder());
}
protected abstract Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings);
protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) {
return builder.build(settings.build());
}
@Test
public void testRandomValues() throws IOException {
@ -74,7 +77,7 @@ public abstract class MultiOrdinalsTests {
return 1;
}
});
int lastOrd = -1;
long lastOrd = -1;
for (OrdAndId ordAndId : ordsAndIds) {
if (lastOrd != ordAndId.ord) {
lastOrd = ordAndId.ord;
@ -105,27 +108,27 @@ public abstract class MultiOrdinalsTests {
Ordinals ords = creationMultiOrdinals(builder);
Ordinals.Docs docs = ords.ordinals();
int docId = ordsAndIds.get(0).id;
List<Integer> docOrds = new ArrayList<Integer>();
List<Long> docOrds = new ArrayList<Long>();
for (OrdAndId ordAndId : ordsAndIds) {
if (docId == ordAndId.id) {
docOrds.add(ordAndId.ord);
} else {
if (!docOrds.isEmpty()) {
assertThat(docs.getOrd(docId), equalTo(docOrds.get(0)));
IntsRef ref = docs.getOrds(docId);
LongsRef ref = docs.getOrds(docId);
assertThat(ref.offset, equalTo(0));
for (int i = ref.offset; i < ref.length; i++) {
assertThat(ref.ints[i], equalTo(docOrds.get(i)));
assertThat(ref.longs[i], equalTo(docOrds.get(i)));
}
final int[] array = new int[docOrds.size()];
final long[] array = new long[docOrds.size()];
for (int i = 0; i < array.length; i++) {
array[i] = docOrds.get(i);
}
assertIter(docs.getIter(docId), array);
}
for (int i = docId + 1; i < ordAndId.id; i++) {
assertThat(docs.getOrd(i), equalTo(0));
assertThat(docs.getOrd(i), equalTo(0L));
}
docId = ordAndId.id;
docOrds.clear();
@ -137,10 +140,10 @@ public abstract class MultiOrdinalsTests {
}
public static class OrdAndId {
final int ord;
final long ord;
final int id;
public OrdAndId(int ord, int id) {
public OrdAndId(long ord, int id) {
this.ord = ord;
this.id = id;
}
@ -150,7 +153,7 @@ public abstract class MultiOrdinalsTests {
final int prime = 31;
int result = 1;
result = prime * result + id;
result = prime * result + ord;
result = prime * result + (int) ord;
return result;
}
@ -174,7 +177,7 @@ public abstract class MultiOrdinalsTests {
@Test
public void testOrdinals() throws Exception {
int maxDoc = 7;
int maxOrds = 32;
long maxOrds = 32;
OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc);
builder.nextOrdinal(); // 1
builder.addDoc(1).addDoc(4).addDoc(5).addDoc(6);
@ -186,97 +189,99 @@ public abstract class MultiOrdinalsTests {
builder.addDoc(0).addDoc(4).addDoc(5).addDoc(6);
builder.nextOrdinal(); // 5
builder.addDoc(4).addDoc(5).addDoc(6);
int ord = builder.nextOrdinal(); // 6
long ord = builder.nextOrdinal(); // 6
builder.addDoc(4).addDoc(5).addDoc(6);
for (int i = ord; i < maxOrds; i++) {
for (long i = ord; i < maxOrds; i++) {
builder.nextOrdinal();
builder.addDoc(5).addDoc(6);
}
long[][] ordinalPlan = new long[][] {
{2, 4},
{1},
{3},
{},
{1, 3, 4, 5, 6},
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}
};
Ordinals ordinals = creationMultiOrdinals(builder);
Ordinals.Docs docs = ordinals.ordinals();
assertThat(docs.getNumDocs(), equalTo(maxDoc));
assertThat(docs.getNumOrds(), equalTo(maxOrds));
assertThat(docs.getMaxOrd(), equalTo(maxOrds + 1)); // Includes null ord
assertThat(docs.isMultiValued(), equalTo(true));
assertThat(ordinals.getMemorySizeInBytes(), greaterThan(0l));
// Document 1
assertThat(docs.getOrd(0), equalTo(2));
IntsRef ref = docs.getOrds(0);
assertThat(ref.offset, equalTo(0));
assertThat(ref.ints[0], equalTo(2));
assertThat(ref.ints[1], equalTo(4));
assertThat(ref.length, equalTo(2));
assertIter(docs.getIter(0), 2, 4);
// Document 2
assertThat(docs.getOrd(1), equalTo(1));
ref = docs.getOrds(1);
assertThat(ref.offset, equalTo(0));
assertThat(ref.ints[0], equalTo(1));
assertThat(ref.length, equalTo(1));
assertIter(docs.getIter(1), 1);
// Document 3
assertThat(docs.getOrd(2), equalTo(3));
ref = docs.getOrds(2);
assertThat(ref.offset, equalTo(0));
assertThat(ref.ints[0], equalTo(3));
assertThat(ref.length, equalTo(1));
assertIter(docs.getIter(2), 3);
// Document 4
assertThat(docs.getOrd(3), equalTo(0));
ref = docs.getOrds(3);
assertThat(ref.offset, equalTo(0));
assertThat(ref.length, equalTo(0));
assertIter(docs.getIter(3));
// Document 5
assertThat(docs.getOrd(4), equalTo(1));
ref = docs.getOrds(4);
assertThat(ref.offset, equalTo(0));
assertThat(ref.ints[0], equalTo(1));
assertThat(ref.ints[1], equalTo(3));
assertThat(ref.ints[2], equalTo(4));
assertThat(ref.ints[3], equalTo(5));
assertThat(ref.ints[4], equalTo(6));
assertThat(ref.length, equalTo(5));
assertIter(docs.getIter(4), 1, 3, 4, 5, 6);
// Document 6
assertThat(docs.getOrd(5), equalTo(1));
ref = docs.getOrds(5);
assertThat(ref.offset, equalTo(0));
int[] expectedOrds = new int[maxOrds];
for (int i = 0; i < maxOrds; i++) {
expectedOrds[i] = i + 1;
assertThat(ref.ints[i], equalTo(i + 1));
}
assertIter(docs.getIter(5), expectedOrds);
assertThat(ref.length, equalTo(maxOrds));
// Document 7
assertThat(docs.getOrd(6), equalTo(1));
ref = docs.getOrds(6);
assertThat(ref.offset, equalTo(0));
expectedOrds = new int[maxOrds];
for (int i = 0; i < maxOrds; i++) {
expectedOrds[i] = i + 1;
assertThat(ref.ints[i], equalTo(i + 1));
}
assertIter(docs.getIter(6), expectedOrds);
assertThat(ref.length, equalTo(maxOrds));
assertEquals(docs, ordinalPlan);
}
protected static void assertIter(Ordinals.Docs.Iter iter, int... expectedOrdinals) {
for (int expectedOrdinal : expectedOrdinals) {
protected static void assertIter(Ordinals.Docs.Iter iter, long... expectedOrdinals) {
for (long expectedOrdinal : expectedOrdinals) {
assertThat(iter.next(), equalTo(expectedOrdinal));
}
assertThat(iter.next(), equalTo(0)); // Last one should always be 0
assertThat(iter.next(), equalTo(0)); // Just checking it stays 0
assertThat(iter.next(), equalTo(0L)); // Last one should always be 0
assertThat(iter.next(), equalTo(0L)); // Just checking it stays 0
}
@Test
public void testMultiValuesDocsWithOverlappingStorageArrays() throws Exception {
int maxDoc = 7;
long maxOrds = 15;
OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc);
for (int i = 0; i < maxOrds; i++) {
builder.nextOrdinal();
if (i < 10) {
builder.addDoc(0);
}
builder.addDoc(1);
if (i == 0) {
builder.addDoc(2);
}
if (i < 5) {
builder.addDoc(3);
}
if (i < 6) {
builder.addDoc(4);
}
if (i == 1) {
builder.addDoc(5);
}
if (i < 10) {
builder.addDoc(6);
}
}
long[][] ordinalPlan = new long[][] {
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15},
{1},
{1,2,3,4,5},
{1,2,3,4,5,6},
{2},
{1,2,3,4,5,6,7,8,9,10}
};
Ordinals ordinals = new MultiOrdinals(builder);
Ordinals.Docs docs = ordinals.ordinals();
assertEquals(docs, ordinalPlan);
}
private void assertEquals(Ordinals.Docs docs, long[][] ordinalPlan) {
long numOrds = 0;
for (int doc = 0; doc < ordinalPlan.length; ++doc) {
if (ordinalPlan[doc].length > 0) {
numOrds = Math.max(numOrds, ordinalPlan[doc][ordinalPlan[doc].length - 1]);
}
}
assertThat(docs.getNumDocs(), equalTo(ordinalPlan.length));
assertThat(docs.getNumOrds(), equalTo(numOrds)); // Includes null ord
assertThat(docs.getMaxOrd(), equalTo(numOrds + 1));
assertThat(docs.isMultiValued(), equalTo(true));
for (int doc = 0; doc < ordinalPlan.length; ++doc) {
LongsRef ref = docs.getOrds(doc);
assertThat(ref.offset, equalTo(0));
long[] ords = ordinalPlan[doc];
assertThat(ref, equalTo(new LongsRef(ords, 0, ords.length)));
assertIter(docs.getIter(doc), ords);
}
}
}

View File

@ -40,9 +40,9 @@ public class SingleOrdinalsTests {
public void testSvValues() throws IOException {
int numDocs = 1000000;
int numOrdinals = numDocs / 4;
Map<Integer, Integer> controlDocToOrdinal = new HashMap<Integer, Integer>();
Map<Integer, Long> controlDocToOrdinal = new HashMap<Integer, Long>();
OrdinalsBuilder builder = new OrdinalsBuilder(numDocs);
int ordinal = builder.nextOrdinal();
long ordinal = builder.nextOrdinal();
for (int doc = 0; doc < numDocs; doc++) {
if (doc % numOrdinals == 0) {
ordinal = builder.nextOrdinal();
@ -56,7 +56,7 @@ public class SingleOrdinalsTests {
Ordinals.Docs docs = ords.ordinals();
assertThat(controlDocToOrdinal.size(), equalTo(docs.getNumDocs()));
for (Map.Entry<Integer, Integer> entry : controlDocToOrdinal.entrySet()) {
for (Map.Entry<Integer, Long> entry : controlDocToOrdinal.entrySet()) {
assertThat(entry.getValue(), equalTo(docs.getOrd(entry.getKey())));
}

View File

@ -1,164 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.fielddata.ordinals;
import org.apache.lucene.util.IntsRef;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.OrdinalsBuilder;
import org.elasticsearch.index.fielddata.ordinals.SparseMultiArrayOrdinals;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.testng.Assert.fail;
/**
*/
public class SparseMultiOrdinalsTests extends MultiOrdinalsTests {
@Override
protected Ordinals creationMultiOrdinals(OrdinalsBuilder builder, ImmutableSettings.Builder settings) {
settings.put("multi_ordinals", "sparse");
return builder.build(settings.build());
}
@Test
public void testMultiValuesSurpassOrdinalsLimit() throws Exception {
OrdinalsBuilder builder = new OrdinalsBuilder(2);
int maxOrds = 128;
for (int i = 0; i < maxOrds; i++) {
builder.nextOrdinal();
if (i == 2 || i == 4) {
builder.addDoc(0);
}
builder.addDoc(1);
}
try {
Builder builder2 = ImmutableSettings.builder();
builder2.put("multi_ordinals_max_docs", 64);
creationMultiOrdinals(builder, builder2);
fail("Exception should have been throwed");
} catch (ElasticSearchException e) {
}
}
@Test
public void testMultiValuesDocsWithOverlappingStorageArrays() throws Exception {
int maxDoc = 7;
int maxOrds = 15;
OrdinalsBuilder builder = new OrdinalsBuilder(maxDoc);
for (int i = 0; i < maxOrds; i++) {
builder.nextOrdinal();
if (i < 10) {
builder.addDoc(0);
}
builder.addDoc(1);
if (i == 0) {
builder.addDoc(2);
}
if (i < 5) {
builder.addDoc(3);
}
if (i < 6) {
builder.addDoc(4);
}
if (i == 1) {
builder.addDoc(5);
}
if (i < 10) {
builder.addDoc(6);
}
}
Ordinals ordinals = new SparseMultiArrayOrdinals(builder, 64);
Ordinals.Docs docs = ordinals.ordinals();
assertThat(docs.getNumDocs(), equalTo(maxDoc));
assertThat(docs.getNumOrds(), equalTo(maxOrds)); // Includes null ord
assertThat(docs.isMultiValued(), equalTo(true));
// Document 1
assertThat(docs.getOrd(0), equalTo(1));
IntsRef ref = docs.getOrds(0);
assertThat(ref.offset, equalTo(0));
for (int i = 0; i < 10; i++) {
assertThat(ref.ints[i], equalTo(i + 1));
}
assertThat(ref.length, equalTo(10));
// Document 2
assertThat(docs.getOrd(1), equalTo(1));
ref = docs.getOrds(1);
assertThat(ref.offset, equalTo(0));
for (int i = 0; i < 15; i++) {
assertThat(ref.ints[i], equalTo(i + 1));
}
assertThat(ref.length, equalTo(15));
// Document 3
assertThat(docs.getOrd(2), equalTo(1));
ref = docs.getOrds(2);
assertThat(ref.offset, equalTo(0));
assertThat(ref.ints[0], equalTo(1));
assertThat(ref.length, equalTo(1));
// Document 4
assertThat(docs.getOrd(3), equalTo(1));
ref = docs.getOrds(3);
assertThat(ref.offset, equalTo(0));
for (int i = 0; i < 5; i++) {
assertThat(ref.ints[i], equalTo(i + 1));
}
assertThat(ref.length, equalTo(5));
// Document 5
assertThat(docs.getOrd(4), equalTo(1));
ref = docs.getOrds(4);
assertThat(ref.offset, equalTo(0));
for (int i = 0; i < 6; i++) {
assertThat(ref.ints[i], equalTo(i + 1));
}
assertThat(ref.length, equalTo(6));
// Document 6
assertThat(docs.getOrd(5), equalTo(2));
ref = docs.getOrds(5);
assertThat(ref.offset, equalTo(0));
assertThat(ref.ints[0], equalTo(2));
assertThat(ref.length, equalTo(1));
// Document 7
assertThat(docs.getOrd(6), equalTo(1));
ref = docs.getOrds(6);
assertThat(ref.offset, equalTo(0));
for (int i = 0; i < 10; i++) {
assertThat(ref.ints[i], equalTo(i + 1));
}
assertThat(ref.length, equalTo(10));
}
}