mirror of https://github.com/apache/lucene.git
LUCENE-9932: Performance improvement for BKD index building (#91)
This commit is contained in:
parent
f215a55bc9
commit
fd4b3c81d5
|
@ -368,6 +368,8 @@ Improvements
|
|||
Optimizations
|
||||
---------------------
|
||||
|
||||
* LUCENE-9932: Performance improvement for BKD index building (neoremind)
|
||||
|
||||
* LUCENE-9827: Speed up merging of stored fields and term vectors for smaller segments.
|
||||
(Daniel Mitterdorfer, Dimitrios Liapis, Adrien Grand, Robert Muir)
|
||||
|
||||
|
|
|
@ -41,4 +41,10 @@ public abstract class MutablePointValues extends PointValues {
|
|||
|
||||
/** Swap the i-th and j-th values. */
|
||||
public abstract void swap(int i, int j);
|
||||
|
||||
/** Save the i-th value into the j-th position in temporary storage. */
|
||||
public abstract void save(int i, int j);
|
||||
|
||||
/** Restore values between i-th and j-th(excluding) in temporary storage into original storage. */
|
||||
public abstract void restore(int i, int j);
|
||||
}
|
||||
|
|
|
@ -89,6 +89,7 @@ class PointValuesWriter {
|
|||
PointValues points =
|
||||
new MutablePointValues() {
|
||||
final int[] ords = new int[numPoints];
|
||||
int[] temp;
|
||||
|
||||
{
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
|
@ -172,6 +173,21 @@ class PointValuesWriter {
|
|||
final long offset = (long) packedBytesLength * ords[i] + k;
|
||||
return bytes.readByte(offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(int i, int j) {
|
||||
if (temp == null) {
|
||||
temp = new int[ords.length];
|
||||
}
|
||||
temp[j] = ords[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void restore(int i, int j) {
|
||||
if (temp != null) {
|
||||
System.arraycopy(temp, i, ords, i, j - i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
final PointValues values;
|
||||
|
@ -291,5 +307,15 @@ class PointValuesWriter {
|
|||
public void swap(int i, int j) {
|
||||
in.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(int i, int j) {
|
||||
in.save(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void restore(int i, int j) {
|
||||
in.restore(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
// locality)
|
||||
private static final int LEVEL_THRESHOLD = 8;
|
||||
// size of histograms: 256 + 1 to indicate that the string is finished
|
||||
private static final int HISTOGRAM_SIZE = 257;
|
||||
protected static final int HISTOGRAM_SIZE = 257;
|
||||
// buckets below this size will be sorted with introsort
|
||||
private static final int LENGTH_THRESHOLD = 100;
|
||||
|
||||
|
@ -42,7 +42,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
private final int[] endOffsets = new int[HISTOGRAM_SIZE];
|
||||
private final int[] commonPrefix;
|
||||
|
||||
private final int maxLength;
|
||||
protected final int maxLength;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
|
@ -128,7 +128,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
sort(from, to, 0, 0);
|
||||
}
|
||||
|
||||
private void sort(int from, int to, int k, int l) {
|
||||
protected void sort(int from, int to, int k, int l) {
|
||||
if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
|
||||
introSort(from, to, k);
|
||||
} else {
|
||||
|
@ -202,7 +202,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
}
|
||||
|
||||
/** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
|
||||
private int getBucket(int i, int k) {
|
||||
protected int getBucket(int i, int k) {
|
||||
return byteAt(i, k) + 1;
|
||||
}
|
||||
|
||||
|
@ -284,7 +284,7 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
* @param startOffsets start offsets per bucket
|
||||
* @param endOffsets end offsets per bucket
|
||||
*/
|
||||
private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
|
||||
protected void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
|
||||
// reorder in place, like the dutch flag problem
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
final int limit = endOffsets[i];
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Stable radix sorter for variable-length strings.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public abstract class StableMSBRadixSorter extends MSBRadixSorter {
|
||||
|
||||
private final int[] fixedStartOffsets;
|
||||
|
||||
public StableMSBRadixSorter(int maxLength) {
|
||||
super(maxLength);
|
||||
fixedStartOffsets = new int[HISTOGRAM_SIZE];
|
||||
}
|
||||
|
||||
/** Save the i-th value into the j-th position in temporary storage. */
|
||||
protected abstract void save(int i, int j);
|
||||
|
||||
/** Restore values between i-th and j-th(excluding) in temporary storage into original storage. */
|
||||
protected abstract void restore(int i, int j);
|
||||
|
||||
@Override
|
||||
protected Sorter getFallbackSorter(int k) {
|
||||
return new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
StableMSBRadixSorter.this.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
for (int o = k; o < maxLength; ++o) {
|
||||
final int b1 = byteAt(i, o);
|
||||
final int b2 = byteAt(j, o);
|
||||
if (b1 != b2) {
|
||||
return b1 - b2;
|
||||
} else if (b1 == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Reorder elements in stable way, since Dutch sort does not guarantee ordering for same values.
|
||||
*
|
||||
* <p>When this method returns, startOffsets and endOffsets are equal.
|
||||
*/
|
||||
@Override
|
||||
protected void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
|
||||
System.arraycopy(startOffsets, 0, fixedStartOffsets, 0, startOffsets.length);
|
||||
for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
|
||||
final int limit = endOffsets[i];
|
||||
for (int h1 = fixedStartOffsets[i]; h1 < limit; h1++) {
|
||||
final int b = getBucket(from + h1, k);
|
||||
final int h2 = startOffsets[b]++;
|
||||
save(from + h1, from + h2);
|
||||
}
|
||||
}
|
||||
restore(from, to);
|
||||
}
|
||||
}
|
|
@ -21,9 +21,9 @@ import org.apache.lucene.codecs.MutablePointValues;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntroSelector;
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
import org.apache.lucene.util.MSBRadixSorter;
|
||||
import org.apache.lucene.util.RadixSelector;
|
||||
import org.apache.lucene.util.Selector;
|
||||
import org.apache.lucene.util.StableMSBRadixSorter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
|
@ -38,14 +38,39 @@ public final class MutablePointsReaderUtils {
|
|||
/** Sort the given {@link MutablePointValues} based on its packed value then doc ID. */
|
||||
public static void sort(
|
||||
BKDConfig config, int maxDoc, MutablePointValues reader, int from, int to) {
|
||||
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
||||
new MSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
boolean sortedByDocID = true;
|
||||
int prevDoc = 0;
|
||||
for (int i = from; i < to; ++i) {
|
||||
int doc = reader.getDocID(i);
|
||||
if (doc < prevDoc) {
|
||||
sortedByDocID = false;
|
||||
break;
|
||||
}
|
||||
prevDoc = doc;
|
||||
}
|
||||
|
||||
// No need to tie break on doc IDs if already sorted by doc ID, since we use a stable sort.
|
||||
// This should be a common situation as IndexWriter accumulates data in doc ID order when
|
||||
// index sorting is not enabled.
|
||||
final int bitsPerDocId = sortedByDocID ? 0 : PackedInts.bitsRequired(maxDoc - 1);
|
||||
new StableMSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void save(int i, int j) {
|
||||
reader.save(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void restore(int i, int j) {
|
||||
reader.restore(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
if (k < config.packedBytesLength) {
|
||||
|
@ -55,46 +80,6 @@ public final class MutablePointsReaderUtils {
|
|||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
|
||||
return new IntroSorter() {
|
||||
|
||||
final BytesRef pivot = new BytesRef();
|
||||
final BytesRef scratch = new BytesRef();
|
||||
int pivotDoc;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
reader.swap(i, j);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
reader.getValue(i, pivot);
|
||||
pivotDoc = reader.getDocID(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (k < config.packedBytesLength) {
|
||||
reader.getValue(j, scratch);
|
||||
int cmp =
|
||||
Arrays.compareUnsigned(
|
||||
pivot.bytes,
|
||||
pivot.offset + k,
|
||||
pivot.offset + k + config.packedBytesLength - k,
|
||||
scratch.bytes,
|
||||
scratch.offset + k,
|
||||
scratch.offset + k + config.packedBytesLength - k);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
};
|
||||
}
|
||||
}.sort(from, to);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class TestStableMSBRadixSorter extends LuceneTestCase {
|
||||
|
||||
private void test(BytesRef[] refs, int len) {
|
||||
BytesRef[] expected = ArrayUtil.copyOfSubArray(refs, 0, len);
|
||||
Arrays.sort(expected);
|
||||
|
||||
int maxLength = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
BytesRef ref = refs[i];
|
||||
maxLength = Math.max(maxLength, ref.length);
|
||||
}
|
||||
switch (random().nextInt(3)) {
|
||||
case 0:
|
||||
maxLength += TestUtil.nextInt(random(), 1, 5);
|
||||
break;
|
||||
case 1:
|
||||
maxLength = Integer.MAX_VALUE;
|
||||
break;
|
||||
default:
|
||||
// leave unchanged
|
||||
break;
|
||||
}
|
||||
|
||||
final int finalMaxLength = maxLength;
|
||||
new StableMSBRadixSorter(maxLength) {
|
||||
|
||||
private BytesRef[] temp;
|
||||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
assertTrue(k < finalMaxLength);
|
||||
BytesRef ref = refs[i];
|
||||
if (ref.length <= k) {
|
||||
return -1;
|
||||
}
|
||||
return ref.bytes[ref.offset + k] & 0xff;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
BytesRef tmp = refs[i];
|
||||
refs[i] = refs[j];
|
||||
refs[j] = tmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void save(int i, int j) {
|
||||
if (temp == null) {
|
||||
temp = new BytesRef[refs.length];
|
||||
}
|
||||
temp[j] = refs[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void restore(int i, int j) {
|
||||
if (temp != null) {
|
||||
System.arraycopy(temp, i, refs, i, j - i);
|
||||
}
|
||||
}
|
||||
}.sort(0, len);
|
||||
BytesRef[] actual = ArrayUtil.copyOfSubArray(refs, 0, len);
|
||||
assertArrayEquals(expected, actual);
|
||||
// Verify that the arrays are not only equal after sorting with Arrays#sort and
|
||||
// StableMSBRadixSorter
|
||||
// but also that they have the very same instance at every index.
|
||||
// This is different from MSBRadixSorter which does not guarantee ordering of the same value.
|
||||
assertEquals(expected.length, actual.length);
|
||||
for (int i = 0; i < expected.length; i++) {
|
||||
assertSame(expected[i].bytes, actual[i].bytes);
|
||||
}
|
||||
}
|
||||
|
||||
public void testEmpty() {
|
||||
test(new BytesRef[random().nextInt(5)], 0);
|
||||
}
|
||||
|
||||
public void testOneValue() {
|
||||
BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random()));
|
||||
test(new BytesRef[] {bytes}, 1);
|
||||
}
|
||||
|
||||
public void testTwoValues() {
|
||||
BytesRef bytes1 = new BytesRef(TestUtil.randomSimpleString(random()));
|
||||
BytesRef bytes2 = new BytesRef(TestUtil.randomSimpleString(random()));
|
||||
test(new BytesRef[] {bytes1, bytes2}, 2);
|
||||
}
|
||||
|
||||
private void testRandom(int commonPrefixLen, int maxLen) {
|
||||
byte[] commonPrefix = new byte[commonPrefixLen];
|
||||
random().nextBytes(commonPrefix);
|
||||
final int len = random().nextInt(100000);
|
||||
BytesRef[] bytes = new BytesRef[len + random().nextInt(50)];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
byte[] b = new byte[commonPrefixLen + random().nextInt(maxLen)];
|
||||
random().nextBytes(b);
|
||||
System.arraycopy(commonPrefix, 0, b, 0, commonPrefixLen);
|
||||
bytes[i] = new BytesRef(b);
|
||||
}
|
||||
test(bytes, len);
|
||||
}
|
||||
|
||||
public void testRandom() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
testRandom(0, 10);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomWithLotsOfDuplicates() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
testRandom(0, 2);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomWithSharedPrefix() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
testRandom(TestUtil.nextInt(random(), 1, 30), 10);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomWithSharedPrefixAndLotsOfDuplicates() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
testRandom(TestUtil.nextInt(random(), 1, 30), 2);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandom2() {
|
||||
// how large our alphabet is
|
||||
int letterCount = TestUtil.nextInt(random(), 2, 10);
|
||||
|
||||
// how many substring fragments to use
|
||||
int substringCount = TestUtil.nextInt(random(), 2, 10);
|
||||
Set<BytesRef> substringsSet = new HashSet<>();
|
||||
|
||||
// how many strings to make
|
||||
int stringCount = atLeast(10000);
|
||||
|
||||
// System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + "
|
||||
// stringCount=" + stringCount);
|
||||
while (substringsSet.size() < substringCount) {
|
||||
int length = TestUtil.nextInt(random(), 2, 10);
|
||||
byte[] bytes = new byte[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
bytes[i] = (byte) random().nextInt(letterCount);
|
||||
}
|
||||
BytesRef br = new BytesRef(bytes);
|
||||
substringsSet.add(br);
|
||||
// System.out.println("add substring count=" + substringsSet.size() + ": " + br);
|
||||
}
|
||||
|
||||
BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
|
||||
double[] chance = new double[substrings.length];
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < substrings.length; i++) {
|
||||
chance[i] = random().nextDouble();
|
||||
sum += chance[i];
|
||||
}
|
||||
|
||||
// give each substring a random chance of occurring:
|
||||
double accum = 0.0;
|
||||
for (int i = 0; i < substrings.length; i++) {
|
||||
accum += chance[i] / sum;
|
||||
chance[i] = accum;
|
||||
}
|
||||
|
||||
Set<BytesRef> stringsSet = new HashSet<>();
|
||||
int iters = 0;
|
||||
while (stringsSet.size() < stringCount && iters < stringCount * 5) {
|
||||
int count = TestUtil.nextInt(random(), 1, 5);
|
||||
BytesRefBuilder b = new BytesRefBuilder();
|
||||
for (int i = 0; i < count; i++) {
|
||||
double v = random().nextDouble();
|
||||
accum = 0.0;
|
||||
for (int j = 0; j < substrings.length; j++) {
|
||||
accum += chance[j];
|
||||
if (accum >= v) {
|
||||
b.append(substrings[j]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
BytesRef br = b.toBytesRef();
|
||||
stringsSet.add(br);
|
||||
// System.out.println("add string count=" + stringsSet.size() + ": " + br);
|
||||
iters++;
|
||||
}
|
||||
|
||||
test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
|
||||
}
|
||||
}
|
|
@ -1696,6 +1696,18 @@ public class TestBKD extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public byte getByteAt(int i, int k) {
|
||||
BytesRef b = new BytesRef();
|
||||
getValue(i, b);
|
||||
return b.bytes[b.offset + k];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(int i, int j) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void restore(int i, int j) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
|
@ -1839,6 +1851,16 @@ public class TestBKD extends LuceneTestCase {
|
|||
public int getDocCount() {
|
||||
return 11;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(int i, int j) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void restore(int i, int j) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
|
||||
IllegalStateException ex =
|
||||
|
|
|
@ -28,16 +28,22 @@ import org.apache.lucene.util.TestUtil;
|
|||
public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
||||
|
||||
public void testSort() {
|
||||
for (int iter = 0; iter < 5; ++iter) {
|
||||
doTestSort();
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
doTestSort(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSort() {
|
||||
public void testSortWithIncrementalDocId() {
|
||||
for (int iter = 0; iter < 10; ++iter) {
|
||||
doTestSort(true);
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestSort(boolean isDocIdIncremental) {
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
BKDConfig config = new BKDConfig(1, 1, bytesPerDim, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
Point[] points = createRandomPoints(config, maxDoc, new int[1]);
|
||||
Point[] points = createRandomPoints(config, maxDoc, new int[1], isDocIdIncremental);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
MutablePointsReaderUtils.sort(config, maxDoc, reader, 0, points.length);
|
||||
Arrays.sort(
|
||||
|
@ -53,7 +59,23 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
}
|
||||
});
|
||||
assertNotSame(points, reader.points);
|
||||
assertArrayEquals(points, reader.points);
|
||||
assertEquals(points.length, reader.points.length);
|
||||
|
||||
// Check doc IDs are in ascending order.
|
||||
// If doc IDs are already increasing, StableMSBRadixSorter should keep doc ID's ordering.
|
||||
// If doc IDs are not ordered, StableMSBRadixSorter should compare doc ID to guarantee the
|
||||
// ordering.
|
||||
Point prevPoint = null;
|
||||
for (int i = 0; i < points.length; i++) {
|
||||
assertEquals(points[i].packedValue, reader.points[i].packedValue);
|
||||
assertSame(points[i].packedValue, reader.points[i].packedValue);
|
||||
if (prevPoint != null) {
|
||||
if (reader.points[i].packedValue.equals(prevPoint.packedValue)) {
|
||||
assertTrue(reader.points[i].doc >= prevPoint.doc);
|
||||
}
|
||||
}
|
||||
prevPoint = reader.points[i];
|
||||
}
|
||||
}
|
||||
|
||||
public void testSortByDim() {
|
||||
|
@ -66,7 +88,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
BKDConfig config = createRandomConfig();
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
int[] commonPrefixLengths = new int[config.numDims];
|
||||
Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths);
|
||||
Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths, false);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int sortedDim = random().nextInt(config.numIndexDims);
|
||||
MutablePointsReaderUtils.sortByDim(
|
||||
|
@ -119,7 +141,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
BKDConfig config = createRandomConfig();
|
||||
int[] commonPrefixLengths = new int[config.numDims];
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths);
|
||||
Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths, false);
|
||||
final int splitDim = random().nextInt(config.numIndexDims);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
|
||||
|
@ -180,15 +202,17 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private static Point[] createRandomPoints(
|
||||
BKDConfig config, int maxDoc, int[] commonPrefixLengths) {
|
||||
BKDConfig config, int maxDoc, int[] commonPrefixLengths, boolean isDocIdIncremental) {
|
||||
assertTrue(commonPrefixLengths.length == config.numDims);
|
||||
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
|
||||
Point[] points = new Point[numPoints];
|
||||
if (random().nextInt(5) != 0) {
|
||||
if (random().nextInt(10) != 0) {
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
random().nextBytes(value);
|
||||
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||
points[i] =
|
||||
new Point(
|
||||
value, isDocIdIncremental ? Math.min(i, maxDoc - 1) : random().nextInt(maxDoc));
|
||||
}
|
||||
for (int i = 0; i < config.numDims; ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim);
|
||||
|
@ -218,7 +242,8 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
random().nextBytes(dataDims);
|
||||
System.arraycopy(
|
||||
dataDims, 0, value, config.packedIndexBytesLength, numDataDims * config.bytesPerDim);
|
||||
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||
points[i] =
|
||||
new Point(value, isDocIdIncremental ? Math.min(i, maxDoc) : random().nextInt(maxDoc));
|
||||
}
|
||||
for (int i = 0; i < config.numIndexDims; ++i) {
|
||||
commonPrefixLengths[i] = config.bytesPerDim;
|
||||
|
@ -281,6 +306,8 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
|
||||
private final Point[] points;
|
||||
|
||||
private Point[] temp;
|
||||
|
||||
DummyPointsReader(Point[] points) {
|
||||
this.points = points.clone();
|
||||
}
|
||||
|
@ -352,5 +379,20 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
public int getDocCount() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(int i, int j) {
|
||||
if (temp == null) {
|
||||
temp = new Point[points.length];
|
||||
}
|
||||
temp[j] = points[i];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void restore(int i, int j) {
|
||||
if (temp != null) {
|
||||
System.arraycopy(temp, i, points, i, j - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue