Make the acceptable compression overhead used by MultiOrdinals configurable and default to PackedInts.FASTEST (causing it to byte align).

Closes #3623

Before this commit , this was the output of TermsFacetSearchBenchmark, on my MacBookAir:

```
------------------ SUMMARY -------------------------------
                     name      took    millis
                  terms_s      7.3s        36
              terms_map_s     28.8s       144
                  terms_l     15.9s        79
              terms_map_l     15.5s        77
                 terms_sm        1m       319
             terms_map_sm      4.9m      1491
                 terms_lm      2.7m       825
             terms_map_lm      2.7m       829
          terms_stats_s_l     37.6s       188
         terms_stats_s_lm      2.4m       722
         terms_stats_sm_l      6.5m      1958
------------------ SUMMARY -------------------------------
```

After the change to FASTEST, we have:

```
------------------ SUMMARY -------------------------------
                     name      took    millis
                  terms_s      6.9s        34
              terms_map_s     28.8s       144
                  terms_l     17.4s        87
              terms_map_l     17.6s        88
                 terms_sm       42s       210
             terms_map_sm      4.2m      1287
                 terms_lm      2.3m       714
             terms_map_lm      2.3m       716
          terms_stats_s_l     37.5s       187
         terms_stats_s_lm      1.6m       482
         terms_stats_sm_l      6.1m      1852
------------------ SUMMARY -------------------------------
```
This commit is contained in:
Boaz Leskes 2013-09-05 15:05:37 +02:00
parent e33107d493
commit c6ac5ac433
6 changed files with 603 additions and 59 deletions

View File

@ -0,0 +1,239 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import java.util.Arrays;
import static org.apache.lucene.util.packed.PackedInts.checkBlockSize;
/**
* Common functionality shared by {@link AppendingDeltaPackedLongBuffer} and {@link MonotonicAppendingLongBuffer}.
*/
abstract class XAbstractAppendingLongBuffer {
static {
// LUCENE MONITOR: this should be in Lucene 4.5.
assert Lucene.VERSION == Version.LUCENE_44 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
static final int MIN_PAGE_SIZE = 64;
// More than 1M doesn't really makes sense with these appending buffers
// since their goal is to try to have small numbers of bits per value
static final int MAX_PAGE_SIZE = 1 << 20;
final int pageShift, pageMask;
PackedInts.Reader[] values;
private long valuesBytes;
int valuesOff;
long[] pending;
int pendingOff;
float acceptableOverheadRatio;
XAbstractAppendingLongBuffer(int initialBlockCount, int pageSize, float acceptableOverheadRatio) {
values = new PackedInts.Reader[initialBlockCount];
pending = new long[pageSize];
pageShift = checkBlockSize(pageSize, MIN_PAGE_SIZE, MAX_PAGE_SIZE);
pageMask = pageSize - 1;
valuesOff = 0;
pendingOff = 0;
this.acceptableOverheadRatio = acceptableOverheadRatio;
}
final int pageSize() {
return pageMask + 1;
}
/**
* Get the number of values that have been added to the buffer.
*/
public final long size() {
long size = pendingOff;
if (valuesOff > 0) {
size += values[valuesOff - 1].size();
}
if (valuesOff > 1) {
size += (long) (valuesOff - 1) * pageSize();
}
return size;
}
/**
* Append a value to this buffer.
*/
public final void add(long l) {
if (pending == null) {
throw new IllegalStateException("This buffer is frozen");
}
if (pendingOff == pending.length) {
// check size
if (values.length == valuesOff) {
final int newLength = ArrayUtil.oversize(valuesOff + 1, 8);
grow(newLength);
}
packPendingValues();
valuesBytes += values[valuesOff].ramBytesUsed();
++valuesOff;
// reset pending buffer
pendingOff = 0;
}
pending[pendingOff++] = l;
}
void grow(int newBlockCount) {
values = Arrays.copyOf(values, newBlockCount);
}
abstract void packPendingValues();
/**
* Get a value from this buffer.
*/
public final long get(long index) {
assert index >= 0 && index < size();
final int block = (int) (index >> pageShift);
final int element = (int) (index & pageMask);
return get(block, element);
}
/**
* Bulk get: read at least one and at most <code>len</code> longs starting
* from <code>index</code> into <code>arr[off:off+len]</code> and return
* the actual number of values that have been read.
*/
public final int get(long index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < size();
assert off + len <= arr.length;
int block = (int) (index >> pageShift);
int element = (int) (index & pageMask);
return get(block, element, arr, off, len);
}
abstract long get(int block, int element);
abstract int get(int block, int element, long[] arr, int off, int len);
/**
* Return an iterator over the values of this buffer.
*/
public Iterator iterator() {
return new Iterator();
}
final public class Iterator {
long[] currentValues;
int vOff, pOff;
int currentCount; // number of entries of the current page
Iterator() {
vOff = pOff = 0;
if (valuesOff == 0) {
currentValues = pending;
currentCount = pendingOff;
} else {
currentValues = new long[values[0].size()];
fillValues();
}
}
void fillValues() {
if (vOff == valuesOff) {
currentValues = pending;
currentCount = pendingOff;
} else {
currentCount = values[vOff].size();
for (int k = 0; k < currentCount; ) {
k += get(vOff, k, currentValues, k, currentCount - k);
}
}
}
/**
* Whether or not there are remaining values.
*/
public final boolean hasNext() {
return pOff < currentCount;
}
/**
* Return the next long in the buffer.
*/
public final long next() {
assert hasNext();
long result = currentValues[pOff++];
if (pOff == currentCount) {
vOff += 1;
pOff = 0;
if (vOff <= valuesOff) {
fillValues();
} else {
currentCount = 0;
}
}
return result;
}
}
long baseRamBytesUsed() {
return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // the 2 arrays
+ 2 * RamUsageEstimator.NUM_BYTES_INT // the 2 offsets
+ 2 * RamUsageEstimator.NUM_BYTES_INT // pageShift, pageMask
+ RamUsageEstimator.NUM_BYTES_FLOAT // acceptable overhead
+ RamUsageEstimator.NUM_BYTES_LONG; // valuesBytes
}
/**
* Return the number of bytes used by this instance.
*/
public long ramBytesUsed() {
// TODO: this is called per-doc-per-norms/dv-field, can we optimize this?
long bytesUsed = RamUsageEstimator.alignObjectSize(baseRamBytesUsed())
+ (pending != null ? RamUsageEstimator.sizeOf(pending) : 0L)
+ RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * values.length); // values
return bytesUsed + valuesBytes;
}
/**
* Pack all pending values in this buffer. Subsequent calls to {@link #add(long)} will fail.
*/
public void freeze() {
if (pendingOff > 0) {
if (values.length == valuesOff) {
grow(valuesOff + 1); // don't oversize!
}
packPendingValues();
valuesBytes += values[valuesOff].ramBytesUsed();
++valuesOff;
pendingOff = 0;
}
pending = null;
}
}

View File

@ -0,0 +1,107 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
/**
* Utility class to buffer a list of signed longs in memory. This class only
* supports appending and is optimized for non-negative numbers with a uniform distribution over a fixed (limited) range
*
* @lucene.internal
*/
public final class XAppendingPackedLongBuffer extends XAbstractAppendingLongBuffer {
static {
// LUCENE MONITOR: this should be in Lucene 4.5.
assert Lucene.VERSION == Version.LUCENE_44 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
/**
* {@link XAppendingPackedLongBuffer}
*
* @param initialPageCount the initial number of pages
* @param pageSize the size of a single page
* @param acceptableOverheadRatio an acceptable overhead ratio per value
*/
public XAppendingPackedLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
super(initialPageCount, pageSize, acceptableOverheadRatio);
}
/**
* Create an {@link XAppendingPackedLongBuffer} with initialPageCount=16,
* pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
*/
public XAppendingPackedLongBuffer() {
this(16, 1024, PackedInts.DEFAULT);
}
/**
* Create an {@link XAppendingPackedLongBuffer} with initialPageCount=16,
* pageSize=1024
*/
public XAppendingPackedLongBuffer(float acceptableOverheadRatio) {
this(16, 1024, acceptableOverheadRatio);
}
@Override
long get(int block, int element) {
if (block == valuesOff) {
return pending[element];
} else {
return values[block].get(element);
}
}
@Override
int get(int block, int element, long[] arr, int off, int len) {
if (block == valuesOff) {
int sysCopyToRead = Math.min(len, pendingOff - element);
System.arraycopy(pending, element, arr, off, sysCopyToRead);
return sysCopyToRead;
} else {
/* packed block */
return values[block].get(element, arr, off, len);
}
}
@Override
void packPendingValues() {
// compute max delta
long minValue = pending[0];
long maxValue = pending[0];
for (int i = 1; i < pendingOff; ++i) {
minValue = Math.min(minValue, pending[i]);
maxValue = Math.max(maxValue, pending[i]);
}
// build a new packed reader
final int bitsRequired = minValue < 0 ? 64 : PackedInts.bitsRequired(maxValue);
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
for (int i = 0; i < pendingOff; ) {
i += mutable.set(i, pending, i, pendingOff - i);
}
values[valuesOff] = mutable;
}
}

View File

@ -0,0 +1,166 @@
package org.apache.lucene.util.packed;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import java.util.Arrays;
/**
* Utility class to buffer signed longs in memory, which is optimized for the
* case where the sequence is monotonic, although it can encode any sequence of
* arbitrary longs. It only supports appending.
*
* @lucene.internal
*/
public final class XMonotonicAppendingLongBuffer extends XAbstractAppendingLongBuffer {
static {
// LUCENE MONITOR: this should be in Lucene 4.5.
assert Lucene.VERSION == Version.LUCENE_44 : "Elasticsearch has upgraded to Lucene Version: [" + Lucene.VERSION + "] this class should be removed";
}
static long zigZagDecode(long n) {
return ((n >>> 1) ^ -(n & 1));
}
static long zigZagEncode(long n) {
return (n >> 63) ^ (n << 1);
}
float[] averages;
long[] minValues;
/**
* @param initialPageCount the initial number of pages
* @param pageSize the size of a single page
* @param acceptableOverheadRatio an acceptable overhead ratio per value
*/
public XMonotonicAppendingLongBuffer(int initialPageCount, int pageSize, float acceptableOverheadRatio) {
super(initialPageCount, pageSize, acceptableOverheadRatio);
averages = new float[values.length];
minValues = new long[values.length];
}
/**
* Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16,
* pageSize=1024 and acceptableOverheadRatio={@link PackedInts#DEFAULT}
*/
public XMonotonicAppendingLongBuffer() {
this(16, 1024, PackedInts.DEFAULT);
}
/**
* Create an {@link MonotonicAppendingLongBuffer} with initialPageCount=16,
* pageSize=1024
*/
public XMonotonicAppendingLongBuffer(float acceptableOverheadRatio) {
this(16, 1024, acceptableOverheadRatio);
}
@Override
long get(int block, int element) {
if (block == valuesOff) {
return pending[element];
} else {
final long base = minValues[block] + (long) (averages[block] * (long) element);
if (values[block] == null) {
return base;
} else {
return base + zigZagDecode(values[block].get(element));
}
}
}
@Override
int get(int block, int element, long[] arr, int off, int len) {
if (block == valuesOff) {
int sysCopyToRead = Math.min(len, pendingOff - element);
System.arraycopy(pending, element, arr, off, sysCopyToRead);
return sysCopyToRead;
} else {
if (values[block] == null) {
int toFill = Math.min(len, pending.length - element);
for (int r = 0; r < toFill; r++, off++, element++) {
arr[off] = minValues[block] + (long) (averages[block] * (long) element);
}
return toFill;
} else {
/* packed block */
int read = values[block].get(element, arr, off, len);
for (int r = 0; r < read; r++, off++, element++) {
arr[off] = minValues[block] + (long) (averages[block] * (long) element) + zigZagDecode(arr[off]);
}
return read;
}
}
}
@Override
void grow(int newBlockCount) {
super.grow(newBlockCount);
this.averages = Arrays.copyOf(averages, newBlockCount);
this.minValues = Arrays.copyOf(minValues, newBlockCount);
}
@Override
void packPendingValues() {
assert pendingOff > 0;
minValues[valuesOff] = pending[0];
averages[valuesOff] = pendingOff == 1 ? 0 : (float) (pending[pendingOff - 1] - pending[0]) / (pendingOff - 1);
for (int i = 0; i < pendingOff; ++i) {
pending[i] = zigZagEncode(pending[i] - minValues[valuesOff] - (long) (averages[valuesOff] * (long) i));
}
long maxDelta = 0;
for (int i = 0; i < pendingOff; ++i) {
if (pending[i] < 0) {
maxDelta = -1;
break;
} else {
maxDelta = Math.max(maxDelta, pending[i]);
}
}
if (maxDelta == 0) {
values[valuesOff] = new PackedInts.NullReader(pendingOff);
} else {
final int bitsRequired = maxDelta < 0 ? 64 : PackedInts.bitsRequired(maxDelta);
final PackedInts.Mutable mutable = PackedInts.getMutable(pendingOff, bitsRequired, acceptableOverheadRatio);
for (int i = 0; i < pendingOff; ) {
i += mutable.set(i, pending, i, pendingOff - i);
}
values[valuesOff] = mutable;
}
}
@Override
long baseRamBytesUsed() {
return super.baseRamBytesUsed()
+ 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF; // 2 additional arrays
}
@Override
public long ramBytesUsed() {
return super.ramBytesUsed()
+ RamUsageEstimator.sizeOf(averages) + RamUsageEstimator.sizeOf(minValues);
}
}

View File

@ -22,9 +22,9 @@ package org.elasticsearch.index.fielddata.ordinals;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.AppendingLongBuffer;
import org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.XAppendingPackedLongBuffer;
import org.apache.lucene.util.packed.XMonotonicAppendingLongBuffer;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter;
/**
@ -38,13 +38,16 @@ public class MultiOrdinals implements Ordinals {
/**
* Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
*/
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds) {
final int bitsPerOrd = PackedInts.bitsRequired(numOrds);
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) {
int bitsPerOrd = PackedInts.bitsRequired(numOrds);
bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue;
// Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
// beginning of the block and all docs have one at the end of the block
final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
final int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue;
final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
@ -52,14 +55,14 @@ public class MultiOrdinals implements Ordinals {
private final boolean multiValued;
private final long numOrds;
private final MonotonicAppendingLongBuffer endOffsets;
private final AppendingLongBuffer ords;
private final XMonotonicAppendingLongBuffer endOffsets;
private final XAppendingPackedLongBuffer ords;
public MultiOrdinals(OrdinalsBuilder builder) {
public MultiOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) {
multiValued = builder.getNumMultiValuesDocs() > 0;
numOrds = builder.getNumOrds();
endOffsets = new MonotonicAppendingLongBuffer();
ords = new AppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE);
endOffsets = new XMonotonicAppendingLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
ords = new XAppendingPackedLongBuffer(OFFSET_INIT_PAGE_COUNT, OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
long lastEndOffset = 0;
for (int i = 0; i < builder.maxDoc(); ++i) {
final LongsRef docOrds = builder.docOrds(i);
@ -117,8 +120,8 @@ public class MultiOrdinals implements Ordinals {
static class MultiDocs implements Ordinals.Docs {
private final MultiOrdinals ordinals;
private final MonotonicAppendingLongBuffer endOffsets;
private final AppendingLongBuffer ords;
private final XMonotonicAppendingLongBuffer endOffsets;
private final XAppendingPackedLongBuffer ords;
private final LongsRef longsScratch;
private final MultiIter iter;
@ -195,10 +198,10 @@ public class MultiOrdinals implements Ordinals {
static class MultiIter implements Iter {
final AppendingLongBuffer ordinals;
final XAppendingPackedLongBuffer ordinals;
long offset, endOffset;
MultiIter(AppendingLongBuffer ordinals) {
MultiIter(XAppendingPackedLongBuffer ordinals) {
this.ordinals = ordinals;
}

View File

@ -40,11 +40,14 @@ import java.util.Comparator;
*/
public final class OrdinalsBuilder implements Closeable {
/** Default acceptable overhead ratio. {@link OrdinalsBuilder} memory usage is mostly transient so it is likely a better trade-off to
* trade memory for speed in order to resize less often. */
/**
* Default acceptable overhead ratio. {@link OrdinalsBuilder} memory usage is mostly transient so it is likely a better trade-off to
* trade memory for speed in order to resize less often.
*/
public static final float DEFAULT_ACCEPTABLE_OVERHEAD_RATIO = PackedInts.FAST;
/** The following structure is used to store ordinals. The idea is to store ords on levels of increasing sizes. Level 0 stores
/**
* The following structure is used to store ordinals. The idea is to store ords on levels of increasing sizes. Level 0 stores
* 1 value and 1 pointer to level 1. Level 1 stores 2 values and 1 pointer to level 2, ..., Level n stores 2**n values and
* 1 pointer to level n+1. If at some point an ordinal or a pointer has 0 as a value, this means that there are no remaining
* values. On the first level, ordinals.get(docId) is the first ordinal for docId or 0 if the document has no ordinals. On
@ -67,7 +70,7 @@ public final class OrdinalsBuilder implements Closeable {
* with document 2: it has 2 more ordinals on level 1: 3 and 4 and its next level index is 1 meaning that there are remaining
* ordinals on the next level. On level 2 at index 1, we can read [5 0 0 0] meaning that 5 is an ordinal as well, but the
* fact that it is followed by zeros means that there are no more ordinals. In the end, document 2 has 2, 3, 4 and 5 as ordinals.
*
* <p/>
* In addition to these structures, there is another array which stores the current position (level + slice + offset in the slice)
* in order to be able to append data in constant time.
*/
@ -75,7 +78,9 @@ public final class OrdinalsBuilder implements Closeable {
private static final int PAGE_SIZE = 1 << 12;
/** Number of slots at <code>level</code> */
/**
* Number of slots at <code>level</code>
*/
private static int numSlots(int level) {
return 1 << level;
}
@ -84,34 +89,46 @@ public final class OrdinalsBuilder implements Closeable {
return numSlots(level) - 1;
}
/** Encode the position for the given level and offset. The idea is to encode the level using unary coding in the lower bits and
* then the offset in the higher bits. */
/**
* Encode the position for the given level and offset. The idea is to encode the level using unary coding in the lower bits and
* then the offset in the higher bits.
*/
private static long position(int level, long offset) {
assert level >= 1;
return (1 << (level - 1)) | (offset << level);
}
/** Decode the level from an encoded position. */
/**
* Decode the level from an encoded position.
*/
private static int level(long position) {
return 1 + Long.numberOfTrailingZeros(position);
}
/** Decode the offset from the position. */
/**
* Decode the offset from the position.
*/
private static long offset(long position, int level) {
return position >>> level;
}
/** Get the ID of the slice given an offset. */
/**
* Get the ID of the slice given an offset.
*/
private static long sliceID(int level, long offset) {
return offset >>> level;
}
/** Compute the first offset of the given slice. */
/**
* Compute the first offset of the given slice.
*/
private static long startOffset(int level, long slice) {
return slice << level;
}
/** Compute the number of ordinals stored for a value given its current position. */
/**
* Compute the number of ordinals stored for a value given its current position.
*/
private static int numOrdinals(int level, long offset) {
return (1 << level) + (int) (offset & slotsMask(level));
}
@ -141,7 +158,9 @@ public final class OrdinalsBuilder implements Closeable {
Arrays.fill(sizes, 1); // reserve the 1st slice on every level
}
/** Allocate a new slice and return its ID. */
/**
* Allocate a new slice and return its ID.
*/
private long newSlice(int level) {
final long newSlice = sizes[level]++;
// Lazily allocate ordinals
@ -257,7 +276,7 @@ public final class OrdinalsBuilder implements Closeable {
ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
spare = new LongsRef();
}
public OrdinalsBuilder(int maxDoc, float acceptableOverheadRatio) throws IOException {
this(-1, maxDoc, acceptableOverheadRatio);
}
@ -275,7 +294,9 @@ public final class OrdinalsBuilder implements Closeable {
return spare;
}
/** Return a {@link PackedInts.Reader} instance mapping every doc ID to its first ordinal if it exists and 0 otherwise. */
/**
* Return a {@link PackedInts.Reader} instance mapping every doc ID to its first ordinal if it exists and 0 otherwise.
*/
public PackedInts.Reader getFirstOrdinals() {
return ordinals.firstOrdinals;
}
@ -287,7 +308,7 @@ public final class OrdinalsBuilder implements Closeable {
public long nextOrdinal() {
return ++currentOrd;
}
/**
* Retruns the current ordinal or <tt>0</tt> if this build has not been advanced via
* {@link #nextOrdinal()}.
@ -297,7 +318,7 @@ public final class OrdinalsBuilder implements Closeable {
}
/**
* Associates the given document id with the current ordinal.
* Associates the given document id with the current ordinal.
*/
public OrdinalsBuilder addDoc(int doc) {
totalNumOrds++;
@ -346,7 +367,7 @@ public final class OrdinalsBuilder implements Closeable {
}
/**
* Returns the number of distinct ordinals in this builder.
* Returns the number of distinct ordinals in this builder.
*/
public long getNumOrds() {
return currentOrd;
@ -370,13 +391,13 @@ public final class OrdinalsBuilder implements Closeable {
}
/**
* Builds an {@link Ordinals} instance from the builders current state.
* Builds an {@link Ordinals} instance from the builders current state.
*/
public Ordinals build(Settings settings) {
final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getNumOrds())) {
final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FASTEST);
if (numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getNumOrds(), acceptableOverheadRatio)) {
// MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields
return new MultiOrdinals(this);
return new MultiOrdinals(this, acceptableOverheadRatio);
} else {
return new SinglePackedOrdinals(this, acceptableOverheadRatio);
}
@ -388,9 +409,10 @@ public final class OrdinalsBuilder implements Closeable {
public int maxDoc() {
return maxDoc;
}
/**
* A {@link TermsEnum} that iterates only full precision prefix coded 64 bit values.
*
* @see #buildFromTerms(TermsEnum, Bits)
*/
public static TermsEnum wrapNumeric64Bit(TermsEnum termsEnum) {
@ -405,11 +427,12 @@ public final class OrdinalsBuilder implements Closeable {
/**
* A {@link TermsEnum} that iterates only full precision prefix coded 32 bit values.
*
* @see #buildFromTerms(TermsEnum, Bits)
*/
public static TermsEnum wrapNumeric32Bit(TermsEnum termsEnum) {
return new FilteredTermsEnum(termsEnum, false) {
@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
// we stop accepting terms once we moved across the prefix codec terms - redundant values!
@ -444,7 +467,7 @@ public final class OrdinalsBuilder implements Closeable {
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
nextOrdinal();
int docId;
while((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
while ((docId = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
addDoc(docId);
}
}
@ -457,7 +480,7 @@ public final class OrdinalsBuilder implements Closeable {
}
};
}
/**
* Closes this builder and release all resources.
*/

View File

@ -20,6 +20,7 @@
package org.elasticsearch.test.unit.index.fielddata.ordinals;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.PackedInts;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.fielddata.ordinals.MultiOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
@ -118,7 +119,7 @@ public class MultiOrdinalsTests extends ElasticsearchTestCase {
assertThat(docs.getOrd(docId), equalTo(docOrds.get(0)));
LongsRef ref = docs.getOrds(docId);
assertThat(ref.offset, equalTo(0));
for (int i = ref.offset; i < ref.length; i++) {
assertThat("index: " + i + " offset: " + ref.offset + " len: " + ref.length, ref.longs[i], equalTo(docOrds.get(i)));
}
@ -160,17 +161,22 @@ public class MultiOrdinalsTests extends ElasticsearchTestCase {
@Override
public boolean equals(Object obj) {
if (this == obj)
if (this == obj) {
return true;
if (obj == null)
}
if (obj == null) {
return false;
if (getClass() != obj.getClass())
}
if (getClass() != obj.getClass()) {
return false;
}
OrdAndId other = (OrdAndId) obj;
if (id != other.id)
if (id != other.id) {
return false;
if (ord != other.ord)
}
if (ord != other.ord) {
return false;
}
return true;
}
}
@ -196,15 +202,15 @@ public class MultiOrdinalsTests extends ElasticsearchTestCase {
builder.nextOrdinal();
builder.addDoc(5).addDoc(6);
}
long[][] ordinalPlan = new long[][] {
long[][] ordinalPlan = new long[][]{
{2, 4},
{1},
{3},
{},
{1, 3, 4, 5, 6},
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32},
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
};
Ordinals ordinals = creationMultiOrdinals(builder);
@ -249,18 +255,18 @@ public class MultiOrdinalsTests extends ElasticsearchTestCase {
builder.addDoc(6);
}
}
long[][] ordinalPlan = new long[][] {
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15},
long[][] ordinalPlan = new long[][]{
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1},
{1,2,3,4,5},
{1,2,3,4,5,6},
{1, 2, 3, 4, 5},
{1, 2, 3, 4, 5, 6},
{2},
{1,2,3,4,5,6,7,8,9,10}
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
};
Ordinals ordinals = new MultiOrdinals(builder);
Ordinals ordinals = new MultiOrdinals(builder, PackedInts.FASTEST);
Ordinals.Docs docs = ordinals.ordinals();
assertEquals(docs, ordinalPlan);
}