Make the acceptable compression overhead used by MultiOrdinals configurable and default to PackedInts.FASTEST (causing it to byte align).
Closes #3623 Before this commit , this was the output of TermsFacetSearchBenchmark, on my MacBookAir: ``` ------------------ SUMMARY ------------------------------- name took millis terms_s 7.3s 36 terms_map_s 28.8s 144 terms_l 15.9s 79 terms_map_l 15.5s 77 terms_sm 1m 319 terms_map_sm 4.9m 1491 terms_lm 2.7m 825 terms_map_lm 2.7m 829 terms_stats_s_l 37.6s 188 terms_stats_s_lm 2.4m 722 terms_stats_sm_l 6.5m 1958 ------------------ SUMMARY ------------------------------- ``` After the change to FASTEST, we have: ``` ------------------ SUMMARY ------------------------------- name took millis terms_s 6.9s 34 terms_map_s 28.8s 144 terms_l 17.4s 87 terms_map_l 17.6s 88 terms_sm 42s 210 terms_map_sm 4.2m 1287 terms_lm 2.3m 714 terms_map_lm 2.3m 716 terms_stats_s_l 37.5s 187 terms_stats_s_lm 1.6m 482 terms_stats_sm_l 6.1m 1852 ------------------ SUMMARY ------------------------------- ```
This commit is contained in:
parent
e33107d493
commit
c6ac5ac433
|
@ -0,0 +1,239 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
|
||||
|
||||
/**
|
||||
* Common functionality shared by {@link AppendingDeltaPackedLongBuffer} and {@link MonotonicAppendingLongBuffer}.
|
||||
*/
|
||||
abstract class XAbstractAppendingLongBuffer {
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.5.
|
||||
assert Lucene.VERSION == Version.LUCENE_44 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
|
||||
static final int MIN_PAGE_SIZE = 64;
|
||||
// More than 1M doesn't really makes sense with these appending buffers
|
||||
// since their goal is to try to have small numbers of bits per value
|
||||
static final int MAX_PAGE_SIZE = 1 << 20;
|
||||
|
||||
final int pageShift, pageMask;
|
||||
PackedInts.Reader[] values;
|
||||
private long valuesBytes;
|
||||
int valuesOff;
|
||||
long[] pending;
|
||||
int pendingOff;
|
||||
float acceptableOverheadRatio;
|
||||
|
||||
XAbstractAppendingLongBuffer(int initialBlockCount, int pageSize, float acceptableOverheadRatio) {
|
||||
values = new PackedInts.Reader[initialBlockCount];
|
||||
pending = new long[pageSize];
|
||||
pageShift = checkBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
|
||||
pageMask = pageSize - 1;
|
||||
valuesOff = 0;
|
||||
pendingOff = 0;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
}
|
||||
|
||||
final int pageSize() {
|
||||
return pageMask + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of values that have been added to the buffer.
|
||||
*/
|
||||
public final long size() {
|
||||
long size = pendingOff;
|
||||
if (valuesOff > 0) {
|
||||
size += values[valuesOff - 1].size();
|
||||
}
|
||||
if (valuesOff > 1) {
|
||||
size += (long) (valuesOff - 1) * pageSize();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a value to this buffer.
|
||||
*/
|
||||
public final void add(long l) {
|
||||
if (pending == null) {
|
||||
throw new IllegalStateException("This buffer is frozen");
|
||||
}
|
||||
if (pendingOff == pending.length) {
|
||||
// check size
|
||||
if (values.length == valuesOff) {
|
||||
final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
|
||||
grow(newLength);
|
||||
}
|
||||
packPendingValues();
|
||||
valuesBytes += values[valuesOff].ramBytesUsed();
|
||||
++valuesOff;
|
||||
// reset pending buffer
|
||||
pendingOff = 0;
|
||||
}
|
||||
pending[pendingOff++] = l;
|
||||
}
|
||||
|
||||
void grow(int newBlockCount) {
|
||||
values = Arrays.copyOf(values, newBlockCount);
|
||||
}
|
||||
|
||||
abstract void packPendingValues();
|
||||
|
||||
/**
|
||||
* Get a value from this buffer.
|
||||
*/
|
||||
public final long get(long index) {
|
||||
assert index >= 0 && index < size();
|
||||
final int block = (int) (index >> pageShift);
|
||||
final int element = (int) (index & pageMask);
|
||||
return get(block, element);
|
||||
}
|
||||
|
||||
/**
|
||||
* Bulk get: read at least one and at most <code>len</code> longs starting
|
||||
* from <code>index</code> into <code>arr[off:off+len]</code> and return
|
||||
* the actual number of values that have been read.
|
||||
*/
|
||||
public final int get(long index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < size();
|
||||
assert off + len <= arr.length;
|
||||
|
||||
int block = (int) (index >> pageShift);
|
||||
int element = (int) (index & pageMask);
|
||||
return get(block, element, arr, off, len);
|
||||
}
|
||||
|
||||
|
||||
abstract long get(int block, int element);
|
||||
|
||||
abstract int get(int block, int element, long[] arr, int off, int len);
|
||||
|
||||
|
||||
/**
|
||||
* Return an iterator over the values of this buffer.
|
||||
*/
|
||||
public Iterator iterator() {
|
||||
return new Iterator();
|
||||
}
|
||||
|
||||
final public class Iterator {
|
||||
|
||||
long[] currentValues;
|
||||
int vOff, pOff;
|
||||
int currentCount; // number of entries of the current page
|
||||
|
||||
Iterator() {
|
||||
vOff = pOff = 0;
|
||||
if (valuesOff == 0) {
|
||||
currentValues = pending;
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
currentValues = new long[values[0].size()];
|
||||
fillValues();
|
||||
}
|
||||
}
|
||||
|
||||
void fillValues() {
|
||||
if (vOff == valuesOff) {
|
||||
currentValues = pending;
|
||||
currentCount = pendingOff;
|
||||
} else {
|
||||
currentCount = values[vOff].size();
|
||||
for (int k = 0; k < currentCount; ) {
|
||||
k += get(vOff, k, currentValues, k, currentCount - k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether or not there are remaining values.
|
||||
*/
|
||||
public final boolean hasNext() {
|
||||
return pOff < currentCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the next long in the buffer.
|
||||
*/
|
||||
public final long next() {
|
||||
assert hasNext();
|
||||
long result = currentValues[pOff++];
|
||||
if (pOff == currentCount) {
|
||||
vOff += 1;
|
||||
pOff = 0;
|
||||
if (vOff <= valuesOff) {
|
||||
fillValues();
|
||||
} else {
|
||||
currentCount = 0;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
long baseRamBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 2 arrays
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_INT // the 2 offsets
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
|
||||
+ RamUsageEstimator.NUM_BYTES_FLOAT // acceptable overhead
|
||||
+ RamUsageEstimator.NUM_BYTES_LONG; // valuesBytes
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of bytes used by this instance.
|
||||
*/
|
||||
public long ramBytesUsed() {
|
||||
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
|
||||
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
|
||||
+ (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
|
||||
+ RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values
|
||||
|
||||
return bytesUsed + valuesBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pack all pending values in this buffer. Subsequent calls to {@link #add(long)} will fail.
|
||||
*/
|
||||
public void freeze() {
|
||||
if (pendingOff > 0) {
|
||||
if (values.length == valuesOff) {
|
||||
grow(valuesOff + 1); // don't oversize!
|
||||
}
|
||||
packPendingValues();
|
||||
valuesBytes += values[valuesOff].ramBytesUsed();
|
||||
++valuesOff;
|
||||
pendingOff = 0;
|
||||
}
|
||||
pending = null;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
/**
|
||||
* Utility class to buffer a list of signed longs in memory. This class only
|
||||
* supports appending and is optimized for non-negative numbers with a uniform distribution over a fixed (limited) range
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class XAppendingPackedLongBuffer extends XAbstractAppendingLongBuffer {
|
||||
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.5.
|
||||
assert Lucene.VERSION == Version.LUCENE_44 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@link XAppendingPackedLongBuffer}
|
||||
*
|
||||
* @param initialPageCount the initial number of pages
|
||||
* @param pageSize the size of a single page
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
*/
|
||||
public XAppendingPackedLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
|
||||
super(initialPageCount, pageSize, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link XAppendingPackedLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
|
||||
*/
|
||||
public XAppendingPackedLongBuffer() {
|
||||
this(16, 1024, PackedInts.DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link XAppendingPackedLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024
|
||||
*/
|
||||
public XAppendingPackedLongBuffer(float acceptableOverheadRatio) {
|
||||
this(16, 1024, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
@Override
|
||||
long get(int block, int element) {
|
||||
if (block == valuesOff) {
|
||||
return pending[element];
|
||||
} else {
|
||||
return values[block].get(element);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
int get(int block, int element, long[] arr, int off, int len) {
|
||||
if (block == valuesOff) {
|
||||
int sysCopyToRead = Math.min(len, pendingOff - element);
|
||||
System.arraycopy(pending, element, arr, off, sysCopyToRead);
|
||||
return sysCopyToRead;
|
||||
} else {
|
||||
/* packed block */
|
||||
return values[block].get(element, arr, off, len);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void packPendingValues() {
|
||||
// compute max delta
|
||||
long minValue = pending[0];
|
||||
long maxValue = pending[0];
|
||||
for (int i = 1; i < pendingOff; ++i) {
|
||||
minValue = Math.min(minValue, pending[i]);
|
||||
maxValue = Math.max(maxValue, pending[i]);
|
||||
}
|
||||
|
||||
|
||||
// build a new packed reader
|
||||
final int bitsRequired = minValue < 0 ? 64 : PackedInts.bitsRequired(maxValue);
|
||||
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
|
||||
for (int i = 0; i < pendingOff; ) {
|
||||
i += mutable.set(i, pending, i, pendingOff - i);
|
||||
}
|
||||
values[valuesOff] = mutable;
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
package org.apache.lucene.util.packed;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Utility class to buffer signed longs in memory, which is optimized for the
|
||||
* case where the sequence is monotonic, although it can encode any sequence of
|
||||
* arbitrary longs. It only supports appending.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class XMonotonicAppendingLongBuffer extends XAbstractAppendingLongBuffer {
|
||||
static {
|
||||
// LUCENE MONITOR: this should be in Lucene 4.5.
|
||||
assert Lucene.VERSION == Version.LUCENE_44 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
|
||||
}
|
||||
|
||||
static long zigZagDecode(long n) {
|
||||
return ((n >>> 1) ^ -(n & 1));
|
||||
}
|
||||
|
||||
static long zigZagEncode(long n) {
|
||||
return (n >> 63) ^ (n << 1);
|
||||
}
|
||||
|
||||
float[] averages;
|
||||
long[] minValues;
|
||||
|
||||
/**
|
||||
* @param initialPageCount the initial number of pages
|
||||
* @param pageSize the size of a single page
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
*/
|
||||
public XMonotonicAppendingLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
|
||||
super(initialPageCount, pageSize, acceptableOverheadRatio);
|
||||
averages = new float[values.length];
|
||||
minValues = new long[values.length];
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
|
||||
*/
|
||||
public XMonotonicAppendingLongBuffer() {
|
||||
this(16, 1024, PackedInts.DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16,
|
||||
* pageSize=1024
|
||||
*/
|
||||
public XMonotonicAppendingLongBuffer(float acceptableOverheadRatio) {
|
||||
this(16, 1024, acceptableOverheadRatio);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
long get(int block, int element) {
|
||||
if (block == valuesOff) {
|
||||
return pending[element];
|
||||
} else {
|
||||
final long base = minValues[block] + (long) (averages[block] * (long) element);
|
||||
if (values[block] == null) {
|
||||
return base;
|
||||
} else {
|
||||
return base + zigZagDecode(values[block].get(element));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
int get(int block, int element, long[] arr, int off, int len) {
|
||||
if (block == valuesOff) {
|
||||
int sysCopyToRead = Math.min(len, pendingOff - element);
|
||||
System.arraycopy(pending, element, arr, off, sysCopyToRead);
|
||||
return sysCopyToRead;
|
||||
} else {
|
||||
if (values[block] == null) {
|
||||
int toFill = Math.min(len, pending.length - element);
|
||||
for (int r = 0; r < toFill; r++, off++, element++) {
|
||||
arr[off] = minValues[block] + (long) (averages[block] * (long) element);
|
||||
}
|
||||
return toFill;
|
||||
} else {
|
||||
|
||||
/* packed block */
|
||||
int read = values[block].get(element, arr, off, len);
|
||||
for (int r = 0; r < read; r++, off++, element++) {
|
||||
arr[off] = minValues[block] + (long) (averages[block] * (long) element) + zigZagDecode(arr[off]);
|
||||
}
|
||||
return read;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void grow(int newBlockCount) {
|
||||
super.grow(newBlockCount);
|
||||
this.averages = Arrays.copyOf(averages, newBlockCount);
|
||||
this.minValues = Arrays.copyOf(minValues, newBlockCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
void packPendingValues() {
|
||||
assert pendingOff > 0;
|
||||
minValues[valuesOff] = pending[0];
|
||||
averages[valuesOff] = pendingOff == 1 ? 0 : (float) (pending[pendingOff - 1] - pending[0]) / (pendingOff - 1);
|
||||
|
||||
for (int i = 0; i < pendingOff; ++i) {
|
||||
pending[i] = zigZagEncode(pending[i] - minValues[valuesOff] - (long) (averages[valuesOff] * (long) i));
|
||||
}
|
||||
long maxDelta = 0;
|
||||
for (int i = 0; i < pendingOff; ++i) {
|
||||
if (pending[i] < 0) {
|
||||
maxDelta = -1;
|
||||
break;
|
||||
} else {
|
||||
maxDelta = Math.max(maxDelta, pending[i]);
|
||||
}
|
||||
}
|
||||
if (maxDelta == 0) {
|
||||
values[valuesOff] = new PackedInts.NullReader(pendingOff);
|
||||
} else {
|
||||
final int bitsRequired = maxDelta < 0 ? 64 : PackedInts.bitsRequired(maxDelta);
|
||||
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
|
||||
for (int i = 0; i < pendingOff; ) {
|
||||
i += mutable.set(i, pending, i, pendingOff - i);
|
||||
}
|
||||
values[valuesOff] = mutable;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
long baseRamBytesUsed() {
|
||||
return super.baseRamBytesUsed()
|
||||
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // 2 additional arrays
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return super.ramBytesUsed()
|
||||
+ RamUsageEstimator.sizeOf(averages) + RamUsageEstimator.sizeOf(minValues);
|
||||
}
|
||||
|
||||
}
|
|
@ -22,9 +22,9 @@ package org.elasticsearch.index.fielddata.ordinals;
|
|||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.XAppendingPackedLongBuffer;
|
||||
import org.apache.lucene.util.packed.XMonotonicAppendingLongBuffer;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter;
|
||||
|
||||
/**
|
||||
|
@ -38,13 +38,16 @@ public class MultiOrdinals implements Ordinals {
|
|||
/**
|
||||
* Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
|
||||
*/
|
||||
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) {
|
||||
final int bitsPerOrd = PackedInts.bitsRequired(numOrds);
|
||||
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) {
|
||||
int bitsPerOrd = PackedInts.bitsRequired(numOrds);
|
||||
bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue;
|
||||
// Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
|
||||
// beginning of the block and all docs have one at the end of the block
|
||||
final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
|
||||
final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
|
||||
final int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
|
||||
int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
|
||||
bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue;
|
||||
|
||||
final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
|
||||
final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
|
||||
return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
|
||||
|
@ -52,14 +55,14 @@ public class MultiOrdinals implements Ordinals {
|
|||
|
||||
private final boolean multiValued;
|
||||
private final long numOrds;
|
||||
private final MonotonicAppendingLongBuffer endOffsets;
|
||||
private final AppendingLongBuffer ords;
|
||||
private final XMonotonicAppendingLongBuffer endOffsets;
|
||||
private final XAppendingPackedLongBuffer ords;
|
||||
|
||||
public MultiOrdinals(OrdinalsBuilder builder) {
|
||||
public MultiOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) {
|
||||
multiValued = builder.getNumMultiValuesDocs() > 0;
|
||||
numOrds = builder.getNumOrds();
|
||||
endOffsets = new MonotonicAppendingLongBuffer();
|
||||
ords = new AppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE);
|
||||
endOffsets = new XMonotonicAppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
|
||||
ords = new XAppendingPackedLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
|
||||
long lastEndOffset = 0;
|
||||
for (int i = 0; i < builder.maxDoc(); ++i) {
|
||||
final LongsRef docOrds = builder.docOrds(i);
|
||||
|
@ -117,8 +120,8 @@ public class MultiOrdinals implements Ordinals {
|
|||
static class MultiDocs implements Ordinals.Docs {
|
||||
|
||||
private final MultiOrdinals ordinals;
|
||||
private final MonotonicAppendingLongBuffer endOffsets;
|
||||
private final AppendingLongBuffer ords;
|
||||
private final XMonotonicAppendingLongBuffer endOffsets;
|
||||
private final XAppendingPackedLongBuffer ords;
|
||||
private final LongsRef longsScratch;
|
||||
private final MultiIter iter;
|
||||
|
||||
|
@ -195,10 +198,10 @@ public class MultiOrdinals implements Ordinals {
|
|||
|
||||
static class MultiIter implements Iter {
|
||||
|
||||
final AppendingLongBuffer ordinals;
|
||||
final XAppendingPackedLongBuffer ordinals;
|
||||
long offset, endOffset;
|
||||
|
||||
MultiIter(AppendingLongBuffer ordinals) {
|
||||
MultiIter(XAppendingPackedLongBuffer ordinals) {
|
||||
this.ordinals = ordinals;
|
||||
}
|
||||
|
||||
|
|
|
@ -40,11 +40,14 @@ import java.util.Comparator;
|
|||
*/
|
||||
public final class OrdinalsBuilder implements Closeable {
|
||||
|
||||
/** Default acceptable overhead ratio. {@link OrdinalsBuilder} memory usage is mostly transient so it is likely a better trade-off to
|
||||
* trade memory for speed in order to resize less often. */
|
||||
/**
|
||||
* Default acceptable overhead ratio. {@link OrdinalsBuilder} memory usage is mostly transient so it is likely a better trade-off to
|
||||
* trade memory for speed in order to resize less often.
|
||||
*/
|
||||
public static final float DEFAULT_ACCEPTABLE_OVERHEAD_RATIO = PackedInts.FAST;
|
||||
|
||||
/** The following structure is used to store ordinals. The idea is to store ords on levels of increasing sizes. Level 0 stores
|
||||
/**
|
||||
* The following structure is used to store ordinals. The idea is to store ords on levels of increasing sizes. Level 0 stores
|
||||
* 1 value and 1 pointer to level 1. Level 1 stores 2 values and 1 pointer to level 2, ..., Level n stores 2**n values and
|
||||
* 1 pointer to level n+1. If at some point an ordinal or a pointer has 0 as a value, this means that there are no remaining
|
||||
* values. On the first level, ordinals.get(docId) is the first ordinal for docId or 0 if the document has no ordinals. On
|
||||
|
@ -67,7 +70,7 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
* with document 2: it has 2 more ordinals on level 1: 3 and 4 and its next level index is 1 meaning that there are remaining
|
||||
* ordinals on the next level. On level 2 at index 1, we can read [5 0 0 0] meaning that 5 is an ordinal as well, but the
|
||||
* fact that it is followed by zeros means that there are no more ordinals. In the end, document 2 has 2, 3, 4 and 5 as ordinals.
|
||||
*
|
||||
* <p/>
|
||||
* In addition to these structures, there is another array which stores the current position (level + slice + offset in the slice)
|
||||
* in order to be able to append data in constant time.
|
||||
*/
|
||||
|
@ -75,7 +78,9 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
|
||||
private static final int PAGE_SIZE = 1 << 12;
|
||||
|
||||
/** Number of slots at <code>level</code> */
|
||||
/**
|
||||
* Number of slots at <code>level</code>
|
||||
*/
|
||||
private static int numSlots(int level) {
|
||||
return 1 << level;
|
||||
}
|
||||
|
@ -84,34 +89,46 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
return numSlots(level) - 1;
|
||||
}
|
||||
|
||||
/** Encode the position for the given level and offset. The idea is to encode the level using unary coding in the lower bits and
|
||||
* then the offset in the higher bits. */
|
||||
/**
|
||||
* Encode the position for the given level and offset. The idea is to encode the level using unary coding in the lower bits and
|
||||
* then the offset in the higher bits.
|
||||
*/
|
||||
private static long position(int level, long offset) {
|
||||
assert level >= 1;
|
||||
return (1 << (level - 1)) | (offset << level);
|
||||
}
|
||||
|
||||
/** Decode the level from an encoded position. */
|
||||
/**
|
||||
* Decode the level from an encoded position.
|
||||
*/
|
||||
private static int level(long position) {
|
||||
return 1 + Long.numberOfTrailingZeros(position);
|
||||
}
|
||||
|
||||
/** Decode the offset from the position. */
|
||||
/**
|
||||
* Decode the offset from the position.
|
||||
*/
|
||||
private static long offset(long position, int level) {
|
||||
return position >>> level;
|
||||
}
|
||||
|
||||
/** Get the ID of the slice given an offset. */
|
||||
/**
|
||||
* Get the ID of the slice given an offset.
|
||||
*/
|
||||
private static long sliceID(int level, long offset) {
|
||||
return offset >>> level;
|
||||
}
|
||||
|
||||
/** Compute the first offset of the given slice. */
|
||||
/**
|
||||
* Compute the first offset of the given slice.
|
||||
*/
|
||||
private static long startOffset(int level, long slice) {
|
||||
return slice << level;
|
||||
}
|
||||
|
||||
/** Compute the number of ordinals stored for a value given its current position. */
|
||||
/**
|
||||
* Compute the number of ordinals stored for a value given its current position.
|
||||
*/
|
||||
private static int numOrdinals(int level, long offset) {
|
||||
return (1 << level) + (int) (offset & slotsMask(level));
|
||||
}
|
||||
|
@ -141,7 +158,9 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
Arrays.fill(sizes, 1); // reserve the 1st slice on every level
|
||||
}
|
||||
|
||||
/** Allocate a new slice and return its ID. */
|
||||
/**
|
||||
* Allocate a new slice and return its ID.
|
||||
*/
|
||||
private long newSlice(int level) {
|
||||
final long newSlice = sizes[level]++;
|
||||
// Lazily allocate ordinals
|
||||
|
@ -275,7 +294,9 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
return spare;
|
||||
}
|
||||
|
||||
/** Return a {@link PackedInts.Reader} instance mapping every doc ID to its first ordinal if it exists and 0 otherwise. */
|
||||
/**
|
||||
* Return a {@link PackedInts.Reader} instance mapping every doc ID to its first ordinal if it exists and 0 otherwise.
|
||||
*/
|
||||
public PackedInts.Reader getFirstOrdinals() {
|
||||
return ordinals.firstOrdinals;
|
||||
}
|
||||
|
@ -373,10 +394,10 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
* Builds an {@link Ordinals} instance from the builders current state.
|
||||
*/
|
||||
public Ordinals build(Settings settings) {
|
||||
final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
|
||||
if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getNumOrds())) {
|
||||
final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FASTEST);
|
||||
if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getNumOrds(), acceptableOverheadRatio)) {
|
||||
// MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields
|
||||
return new MultiOrdinals(this);
|
||||
return new MultiOrdinals(this, acceptableOverheadRatio);
|
||||
} else {
|
||||
return new SinglePackedOrdinals(this, acceptableOverheadRatio);
|
||||
}
|
||||
|
@ -391,6 +412,7 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
|
||||
/**
|
||||
* A {@link TermsEnum} that iterates only full precision prefix coded 64 bit values.
|
||||
*
|
||||
* @see #buildFromTerms(TermsEnum, Bits)
|
||||
*/
|
||||
public static TermsEnum wrapNumeric64Bit(TermsEnum termsEnum) {
|
||||
|
@ -405,6 +427,7 @@ public final class OrdinalsBuilder implements Closeable {
|
|||
|
||||
/**
|
||||
* A {@link TermsEnum} that iterates only full precision prefix coded 32 bit values.
|
||||
*
|
||||
* @see #buildFromTerms(TermsEnum, Bits)
|
||||
*/
|
||||
public static TermsEnum wrapNumeric32Bit(TermsEnum termsEnum) {
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.test.unit.index.fielddata.ordinals;
|
||||
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.index.fielddata.ordinals.MultiOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
|
||||
|
@ -160,17 +161,22 @@ public class MultiOrdinalsTests extends ElasticsearchTestCase {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
if (this == obj) {
|
||||
return true;
|
||||
if (obj == null)
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
OrdAndId other = (OrdAndId) obj;
|
||||
if (id != other.id)
|
||||
if (id != other.id) {
|
||||
return false;
|
||||
if (ord != other.ord)
|
||||
}
|
||||
if (ord != other.ord) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -260,7 +266,7 @@ public class MultiOrdinalsTests extends ElasticsearchTestCase {
|
|||
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
|
||||
};
|
||||
|
||||
Ordinals ordinals = new MultiOrdinals(builder);
|
||||
Ordinals ordinals = new MultiOrdinals(builder, PackedInts.FASTEST);
|
||||
Ordinals.Docs docs = ordinals.ordinals();
|
||||
assertEquals(docs, ordinalPlan);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue