mirror of https://github.com/apache/lucene.git
UCENE-8703: Build point writers in the BKD tree only when they are needed
This commit is contained in:
parent
3fcf7fa54e
commit
748d483e67
|
@ -16,6 +16,9 @@ Improvements
|
||||||
of byte arrays. In addition a new interface PointValue is added to abstract out
|
of byte arrays. In addition a new interface PointValue is added to abstract out
|
||||||
the different formats between offline and on-heap writers. (Ignacio Vera)
|
the different formats between offline and on-heap writers. (Ignacio Vera)
|
||||||
|
|
||||||
|
* LUCENE-8703: Build point writers in the BKD tree only when they are needed.
|
||||||
|
(Ignacio Vera)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-8680: Refactor EdgeTree#relateTriangle method. (Ignacio Vera)
|
* LUCENE-8680: Refactor EdgeTree#relateTriangle method. (Ignacio Vera)
|
||||||
|
|
|
@ -129,8 +129,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
|
|
||||||
protected final FixedBitSet docsSeen;
|
protected final FixedBitSet docsSeen;
|
||||||
|
|
||||||
private OfflinePointWriter offlinePointWriter;
|
private PointWriter pointWriter;
|
||||||
private HeapPointWriter heapPointWriter;
|
private boolean finished;
|
||||||
|
|
||||||
private IndexOutput tempInput;
|
private IndexOutput tempInput;
|
||||||
protected final int maxPointsInLeafNode;
|
protected final int maxPointsInLeafNode;
|
||||||
|
@ -147,7 +147,6 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
/** An upper bound on how many points the caller will add (includes deletions) */
|
/** An upper bound on how many points the caller will add (includes deletions) */
|
||||||
private final long totalPointCount;
|
private final long totalPointCount;
|
||||||
|
|
||||||
|
|
||||||
private final int maxDoc;
|
private final int maxDoc;
|
||||||
|
|
||||||
|
|
||||||
|
@ -187,9 +186,6 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than maxPointsInLeafNode=" + maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease maxPointsInLeafNode");
|
throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than maxPointsInLeafNode=" + maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease maxPointsInLeafNode");
|
||||||
}
|
}
|
||||||
|
|
||||||
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
|
|
||||||
heapPointWriter = new HeapPointWriter(maxPointsSortInHeap, packedBytesLength);
|
|
||||||
|
|
||||||
this.maxMBSortInHeap = maxMBSortInHeap;
|
this.maxMBSortInHeap = maxMBSortInHeap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,39 +212,22 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If the current segment has too many points then we spill over to temp files / offline sort. */
|
|
||||||
private void spillToOffline() throws IOException {
|
|
||||||
|
|
||||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
|
||||||
offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, "spill", 0);
|
|
||||||
tempInput = offlinePointWriter.out;
|
|
||||||
for(int i=0;i<pointCount;i++) {
|
|
||||||
offlinePointWriter.append(heapPointWriter.getPackedValueSlice(i));
|
|
||||||
}
|
|
||||||
heapPointWriter = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void add(byte[] packedValue, int docID) throws IOException {
|
public void add(byte[] packedValue, int docID) throws IOException {
|
||||||
if (packedValue.length != packedBytesLength) {
|
if (packedValue.length != packedBytesLength) {
|
||||||
throw new IllegalArgumentException("packedValue should be length=" + packedBytesLength + " (got: " + packedValue.length + ")");
|
throw new IllegalArgumentException("packedValue should be length=" + packedBytesLength + " (got: " + packedValue.length + ")");
|
||||||
}
|
}
|
||||||
|
if (pointCount >= totalPointCount) {
|
||||||
scratchBytesRef1.bytes = packedValue;
|
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + (pointCount + 1) + " values");
|
||||||
scratchBytesRef1.offset = 0;
|
|
||||||
scratchBytesRef1.length = packedBytesLength;
|
|
||||||
|
|
||||||
if (pointCount >= maxPointsSortInHeap) {
|
|
||||||
if (offlinePointWriter == null) {
|
|
||||||
spillToOffline();
|
|
||||||
}
|
|
||||||
offlinePointWriter.append(packedValue, docID);
|
|
||||||
} else {
|
|
||||||
// Not too many points added yet, continue using heap:
|
|
||||||
heapPointWriter.append(packedValue, docID);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we could specialize for the 1D case:
|
|
||||||
if (pointCount == 0) {
|
if (pointCount == 0) {
|
||||||
|
assert pointWriter == null : "Point writer is already initialized";
|
||||||
|
//total point count is an estimation but the final point count must be equal or lower to that number.
|
||||||
|
if (totalPointCount > maxPointsSortInHeap) {
|
||||||
|
pointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, "spill", 0);
|
||||||
|
tempInput = ((OfflinePointWriter)pointWriter).out;
|
||||||
|
} else {
|
||||||
|
pointWriter = new HeapPointWriter(Math.toIntExact(totalPointCount), packedBytesLength);
|
||||||
|
}
|
||||||
System.arraycopy(packedValue, 0, minPackedValue, 0, packedIndexBytesLength);
|
System.arraycopy(packedValue, 0, minPackedValue, 0, packedIndexBytesLength);
|
||||||
System.arraycopy(packedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
|
System.arraycopy(packedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
|
||||||
} else {
|
} else {
|
||||||
|
@ -262,11 +241,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pointWriter.append(packedValue, docID);
|
||||||
pointCount++;
|
pointCount++;
|
||||||
if (pointCount > totalPointCount) {
|
|
||||||
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
|
|
||||||
}
|
|
||||||
docsSeen.set(docID);
|
docsSeen.set(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -297,12 +273,12 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Catch user silliness:
|
// Catch user silliness:
|
||||||
if (heapPointWriter == null && tempInput == null) {
|
if (finished == true) {
|
||||||
throw new IllegalStateException("already finished");
|
throw new IllegalStateException("already finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark that we already finished:
|
// Mark that we already finished:
|
||||||
heapPointWriter = null;
|
finished = true;
|
||||||
|
|
||||||
long countPerLeaf = pointCount = values.size();
|
long countPerLeaf = pointCount = values.size();
|
||||||
long innerNodeCount = 1;
|
long innerNodeCount = 1;
|
||||||
|
@ -394,12 +370,12 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Catch user silliness:
|
// Catch user silliness:
|
||||||
if (heapPointWriter == null && tempInput == null) {
|
if (finished == true) {
|
||||||
throw new IllegalStateException("already finished");
|
throw new IllegalStateException("already finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark that we already finished:
|
// Mark that we already finished:
|
||||||
heapPointWriter = null;
|
finished = true;
|
||||||
|
|
||||||
this.out = out;
|
this.out = out;
|
||||||
|
|
||||||
|
@ -577,24 +553,25 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
// TODO: specialize the 1D case? it's much faster at indexing time (no partitioning on recurse...)
|
// TODO: specialize the 1D case? it's much faster at indexing time (no partitioning on recurse...)
|
||||||
|
|
||||||
// Catch user silliness:
|
// Catch user silliness:
|
||||||
if (heapPointWriter == null && tempInput == null) {
|
|
||||||
throw new IllegalStateException("already finished");
|
|
||||||
}
|
|
||||||
|
|
||||||
BKDRadixSelector.PathSlice writer;
|
|
||||||
if (offlinePointWriter != null) {
|
|
||||||
offlinePointWriter.close();
|
|
||||||
writer = new BKDRadixSelector.PathSlice(offlinePointWriter, 0, pointCount);
|
|
||||||
tempInput = null;
|
|
||||||
} else {
|
|
||||||
writer = new BKDRadixSelector.PathSlice(heapPointWriter, 0, pointCount);
|
|
||||||
heapPointWriter = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pointCount == 0) {
|
if (pointCount == 0) {
|
||||||
throw new IllegalStateException("must index at least one point");
|
throw new IllegalStateException("must index at least one point");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Catch user silliness:
|
||||||
|
if (finished == true) {
|
||||||
|
throw new IllegalStateException("already finished");
|
||||||
|
}
|
||||||
|
|
||||||
|
//mark as finished
|
||||||
|
finished = true;
|
||||||
|
|
||||||
|
pointWriter.close();
|
||||||
|
BKDRadixSelector.PathSlice points = new BKDRadixSelector.PathSlice(pointWriter, 0, pointCount);
|
||||||
|
//clean up pointers
|
||||||
|
tempInput = null;
|
||||||
|
pointWriter = null;
|
||||||
|
|
||||||
|
|
||||||
long countPerLeaf = pointCount;
|
long countPerLeaf = pointCount;
|
||||||
long innerNodeCount = 1;
|
long innerNodeCount = 1;
|
||||||
|
|
||||||
|
@ -626,7 +603,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
|
||||||
build(1, numLeaves, writer, out,
|
build(1, numLeaves, points, out,
|
||||||
radixSelector, minPackedValue, maxPackedValue,
|
radixSelector, minPackedValue, maxPackedValue,
|
||||||
splitPackedValues, leafBlockFPs);
|
splitPackedValues, leafBlockFPs);
|
||||||
|
|
||||||
|
@ -829,8 +806,6 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
/** Pull a partition back into heap once the point count is low enough while recursing. */
|
/** Pull a partition back into heap once the point count is low enough while recursing. */
|
||||||
private HeapPointWriter switchToHeap(PointWriter source) throws IOException {
|
private HeapPointWriter switchToHeap(PointWriter source) throws IOException {
|
||||||
int count = Math.toIntExact(source.count());
|
int count = Math.toIntExact(source.count());
|
||||||
// Not inside the try because we don't want to close it here:
|
|
||||||
|
|
||||||
try (PointReader reader = source.getReader(0, count);
|
try (PointReader reader = source.getReader(0, count);
|
||||||
HeapPointWriter writer = new HeapPointWriter(count, packedBytesLength)) {
|
HeapPointWriter writer = new HeapPointWriter(count, packedBytesLength)) {
|
||||||
for(int i=0;i<count;i++) {
|
for(int i=0;i<count;i++) {
|
||||||
|
@ -983,11 +958,9 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
// Leaf node: write block
|
// Leaf node: write block
|
||||||
// We can write the block in any order so by default we write it sorted by the dimension that has the
|
// We can write the block in any order so by default we write it sorted by the dimension that has the
|
||||||
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
|
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
|
||||||
|
|
||||||
HeapPointWriter heapSource;
|
HeapPointWriter heapSource;
|
||||||
if (points.writer instanceof HeapPointWriter == false) {
|
if (points.writer instanceof HeapPointWriter == false) {
|
||||||
// Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
|
// Adversarial cases can cause this, e.g. merging big segments with most of the points deleted
|
||||||
// offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
|
|
||||||
heapSource = switchToHeap(points.writer);
|
heapSource = switchToHeap(points.writer);
|
||||||
} else {
|
} else {
|
||||||
heapSource = (HeapPointWriter) points.writer;
|
heapSource = (HeapPointWriter) points.writer;
|
||||||
|
|
|
@ -124,8 +124,8 @@ public class BKDWriter implements Closeable {
|
||||||
|
|
||||||
protected final FixedBitSet docsSeen;
|
protected final FixedBitSet docsSeen;
|
||||||
|
|
||||||
private OfflinePointWriter offlinePointWriter;
|
private PointWriter pointWriter;
|
||||||
private HeapPointWriter heapPointWriter;
|
private boolean finished;
|
||||||
|
|
||||||
private IndexOutput tempInput;
|
private IndexOutput tempInput;
|
||||||
protected final int maxPointsInLeafNode;
|
protected final int maxPointsInLeafNode;
|
||||||
|
@ -180,9 +180,6 @@ public class BKDWriter implements Closeable {
|
||||||
throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than maxPointsInLeafNode=" + maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease maxPointsInLeafNode");
|
throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than maxPointsInLeafNode=" + maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease maxPointsInLeafNode");
|
||||||
}
|
}
|
||||||
|
|
||||||
// We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
|
|
||||||
heapPointWriter = new HeapPointWriter(maxPointsSortInHeap, packedBytesLength);
|
|
||||||
|
|
||||||
this.maxMBSortInHeap = maxMBSortInHeap;
|
this.maxMBSortInHeap = maxMBSortInHeap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,35 +206,27 @@ public class BKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If the current segment has too many points then we spill over to temp files / offline sort. */
|
private void initPointWriter() throws IOException {
|
||||||
private void spillToOffline() throws IOException {
|
assert pointWriter == null : "Point writer is already initialized";
|
||||||
|
//total point count is an estimation but the final point count must be equal or lower to that number.
|
||||||
// For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
|
if (totalPointCount > maxPointsSortInHeap) {
|
||||||
offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, "spill", 0);
|
pointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, "spill", 0);
|
||||||
tempInput = offlinePointWriter.out;
|
tempInput = ((OfflinePointWriter)pointWriter).out;
|
||||||
for(int i=0;i<pointCount;i++) {
|
} else {
|
||||||
offlinePointWriter.append(heapPointWriter.getPackedValueSlice(i));
|
pointWriter = new HeapPointWriter(Math.toIntExact(totalPointCount), packedBytesLength);
|
||||||
}
|
}
|
||||||
heapPointWriter = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void add(byte[] packedValue, int docID) throws IOException {
|
public void add(byte[] packedValue, int docID) throws IOException {
|
||||||
if (packedValue.length != packedBytesLength) {
|
if (packedValue.length != packedBytesLength) {
|
||||||
throw new IllegalArgumentException("packedValue should be length=" + packedBytesLength + " (got: " + packedValue.length + ")");
|
throw new IllegalArgumentException("packedValue should be length=" + packedBytesLength + " (got: " + packedValue.length + ")");
|
||||||
}
|
}
|
||||||
|
if (pointCount >= totalPointCount) {
|
||||||
if (pointCount >= maxPointsSortInHeap) {
|
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + (pointCount + 1) + " values");
|
||||||
if (offlinePointWriter == null) {
|
|
||||||
spillToOffline();
|
|
||||||
}
|
|
||||||
offlinePointWriter.append(packedValue, docID);
|
|
||||||
} else {
|
|
||||||
// Not too many points added yet, continue using heap:
|
|
||||||
heapPointWriter.append(packedValue, docID);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we could specialize for the 1D case:
|
|
||||||
if (pointCount == 0) {
|
if (pointCount == 0) {
|
||||||
|
initPointWriter();
|
||||||
System.arraycopy(packedValue, 0, minPackedValue, 0, packedIndexBytesLength);
|
System.arraycopy(packedValue, 0, minPackedValue, 0, packedIndexBytesLength);
|
||||||
System.arraycopy(packedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
|
System.arraycopy(packedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
|
||||||
} else {
|
} else {
|
||||||
|
@ -251,11 +240,8 @@ public class BKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pointWriter.append(packedValue, docID);
|
||||||
pointCount++;
|
pointCount++;
|
||||||
if (pointCount > totalPointCount) {
|
|
||||||
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
|
|
||||||
}
|
|
||||||
docsSeen.set(docID);
|
docsSeen.set(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -399,12 +385,12 @@ public class BKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Catch user silliness:
|
// Catch user silliness:
|
||||||
if (heapPointWriter == null && tempInput == null) {
|
if (finished == true) {
|
||||||
throw new IllegalStateException("already finished");
|
throw new IllegalStateException("already finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark that we already finished:
|
// Mark that we already finished:
|
||||||
heapPointWriter = null;
|
finished = true;
|
||||||
|
|
||||||
long countPerLeaf = pointCount = values.size();
|
long countPerLeaf = pointCount = values.size();
|
||||||
long innerNodeCount = 1;
|
long innerNodeCount = 1;
|
||||||
|
@ -542,12 +528,12 @@ public class BKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Catch user silliness:
|
// Catch user silliness:
|
||||||
if (heapPointWriter == null && tempInput == null) {
|
if (finished == true) {
|
||||||
throw new IllegalStateException("already finished");
|
throw new IllegalStateException("already finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark that we already finished:
|
// Mark that we already finished:
|
||||||
heapPointWriter = null;
|
finished = true;
|
||||||
|
|
||||||
this.out = out;
|
this.out = out;
|
||||||
|
|
||||||
|
@ -740,7 +726,7 @@ public class BKDWriter implements Closeable {
|
||||||
// TODO: specialize the 1D case? it's much faster at indexing time (no partitioning on recurse...)
|
// TODO: specialize the 1D case? it's much faster at indexing time (no partitioning on recurse...)
|
||||||
|
|
||||||
// Catch user silliness:
|
// Catch user silliness:
|
||||||
if (heapPointWriter == null && tempInput == null) {
|
if (finished == true) {
|
||||||
throw new IllegalStateException("already finished");
|
throw new IllegalStateException("already finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -748,15 +734,15 @@ public class BKDWriter implements Closeable {
|
||||||
throw new IllegalStateException("must index at least one point");
|
throw new IllegalStateException("must index at least one point");
|
||||||
}
|
}
|
||||||
|
|
||||||
BKDRadixSelector.PathSlice points;
|
//mark as finished
|
||||||
if (offlinePointWriter != null) {
|
finished = true;
|
||||||
offlinePointWriter.close();
|
|
||||||
points = new BKDRadixSelector.PathSlice(offlinePointWriter, 0, pointCount);
|
pointWriter.close();
|
||||||
tempInput = null;
|
BKDRadixSelector.PathSlice points = new BKDRadixSelector.PathSlice(pointWriter, 0, pointCount);
|
||||||
} else {
|
//clean up pointers
|
||||||
points = new BKDRadixSelector.PathSlice(heapPointWriter, 0, pointCount);
|
tempInput = null;
|
||||||
heapPointWriter = null;
|
pointWriter = null;
|
||||||
}
|
|
||||||
|
|
||||||
long countPerLeaf = pointCount;
|
long countPerLeaf = pointCount;
|
||||||
long innerNodeCount = 1;
|
long innerNodeCount = 1;
|
||||||
|
@ -1145,6 +1131,7 @@ public class BKDWriter implements Closeable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
finished = true;
|
||||||
if (tempInput != null) {
|
if (tempInput != null) {
|
||||||
// NOTE: this should only happen on exception, e.g. caller calls close w/o calling finish:
|
// NOTE: this should only happen on exception, e.g. caller calls close w/o calling finish:
|
||||||
try {
|
try {
|
||||||
|
@ -1234,8 +1221,6 @@ public class BKDWriter implements Closeable {
|
||||||
/** Pull a partition back into heap once the point count is low enough while recursing. */
|
/** Pull a partition back into heap once the point count is low enough while recursing. */
|
||||||
private HeapPointWriter switchToHeap(PointWriter source) throws IOException {
|
private HeapPointWriter switchToHeap(PointWriter source) throws IOException {
|
||||||
int count = Math.toIntExact(source.count());
|
int count = Math.toIntExact(source.count());
|
||||||
// Not inside the try because we don't want to close it here:
|
|
||||||
|
|
||||||
try (PointReader reader = source.getReader(0, source.count());
|
try (PointReader reader = source.getReader(0, source.count());
|
||||||
HeapPointWriter writer = new HeapPointWriter(count, packedBytesLength)) {
|
HeapPointWriter writer = new HeapPointWriter(count, packedBytesLength)) {
|
||||||
for(int i=0;i<count;i++) {
|
for(int i=0;i<count;i++) {
|
||||||
|
@ -1408,8 +1393,7 @@ public class BKDWriter implements Closeable {
|
||||||
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
|
// least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
|
||||||
HeapPointWriter heapSource;
|
HeapPointWriter heapSource;
|
||||||
if (points.writer instanceof HeapPointWriter == false) {
|
if (points.writer instanceof HeapPointWriter == false) {
|
||||||
// Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
|
// Adversarial cases can cause this, e.g. merging big segments with most of the points deleted
|
||||||
// offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
|
|
||||||
heapSource = switchToHeap(points.writer);
|
heapSource = switchToHeap(points.writer);
|
||||||
} else {
|
} else {
|
||||||
heapSource = (HeapPointWriter) points.writer;
|
heapSource = (HeapPointWriter) points.writer;
|
||||||
|
|
|
@ -667,8 +667,16 @@ public class TestBKD extends LuceneTestCase {
|
||||||
List<Long> toMerge = null;
|
List<Long> toMerge = null;
|
||||||
List<MergeState.DocMap> docMaps = null;
|
List<MergeState.DocMap> docMaps = null;
|
||||||
int seg = 0;
|
int seg = 0;
|
||||||
|
//we force sometimes to provide a bigger point count
|
||||||
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length);
|
long maxDocs = Long.MIN_VALUE;
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
maxDocs = docValues.length;
|
||||||
|
} else {
|
||||||
|
while (maxDocs < docValues.length) {
|
||||||
|
maxDocs = random().nextLong();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode, maxMB, maxDocs);
|
||||||
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
|
||||||
IndexInput in = null;
|
IndexInput in = null;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue