mirror of https://github.com/apache/lucene.git
fix int overflow bug in BKDWriter that prevented it from indexing > 2.1B points; try to improve runtime of Test2BPoints
This commit is contained in:
parent
fafbb2b6c7
commit
1e05d3be76
|
@ -1082,7 +1082,7 @@ public class BKDWriter implements Closeable {
|
||||||
|
|
||||||
// Second pass: write the full values:
|
// Second pass: write the full values:
|
||||||
byte[] lastPackedValue = new byte[bytesPerDim];
|
byte[] lastPackedValue = new byte[bytesPerDim];
|
||||||
for (int i=0;i<source.count;i++) {
|
for (int i=0;i<count;i++) {
|
||||||
// TODO: we could do bulk copying here, avoiding the intermediate copy:
|
// TODO: we could do bulk copying here, avoiding the intermediate copy:
|
||||||
heapSource.readPackedValue(Math.toIntExact(source.start + i), scratchPackedValue);
|
heapSource.readPackedValue(Math.toIntExact(source.start + i), scratchPackedValue);
|
||||||
assert numDims != 1 || valueInOrder(i, lastPackedValue, scratchPackedValue);
|
assert numDims != 1 || valueInOrder(i, lastPackedValue, scratchPackedValue);
|
||||||
|
@ -1143,7 +1143,7 @@ public class BKDWriter implements Closeable {
|
||||||
|
|
||||||
// Partition this source according to how the splitDim split the values:
|
// Partition this source according to how the splitDim split the values:
|
||||||
int nextRightCount = 0;
|
int nextRightCount = 0;
|
||||||
for (int i=0;i<source.count;i++) {
|
for (long i=0;i<source.count;i++) {
|
||||||
boolean result = reader.next();
|
boolean result = reader.next();
|
||||||
assert result;
|
assert result;
|
||||||
byte[] packedValue = reader.packedValue();
|
byte[] packedValue = reader.packedValue();
|
||||||
|
|
|
@ -16,8 +16,16 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.FilterCodec;
|
||||||
|
import org.apache.lucene.codecs.PointsFormat;
|
||||||
|
import org.apache.lucene.codecs.PointsReader;
|
||||||
|
import org.apache.lucene.codecs.PointsWriter;
|
||||||
|
import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
|
||||||
|
import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.LongPoint;
|
import org.apache.lucene.document.LongPoint;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
@ -33,10 +41,10 @@ import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||||
|
|
||||||
// e.g. run like this: ant test -Dtestcase=Test2BPoints -Dtests.nightly=true -Dtests.verbose=true -Dtests.monster=true
|
// e.g. run like this: ant test -Dtestcase=Test2BPoints -Dtests.nightly=true -Dtests.verbose=true -Dtests.monster=true
|
||||||
//
|
//
|
||||||
// or: python -u /l/util/src/python/repeatLuceneTest.py -once -nolog -tmpDir /b/tmp -logDir /l/logs Test2BPoints.test1D -verbose
|
// or: python -u /l/util/src/python/repeatLuceneTest.py -heap 6g -once -nolog -tmpDir /b/tmp -logDir /l/logs Test2BPoints.test2D -verbose
|
||||||
|
|
||||||
@SuppressCodecs({ "SimpleText", "Memory", "Direct", "Compressing" })
|
@SuppressCodecs({ "SimpleText", "Memory", "Direct", "Compressing" })
|
||||||
@TimeoutSuite(millis = 16 * TimeUnits.HOUR)
|
@TimeoutSuite(millis = 365 * 24 * TimeUnits.HOUR) // hopefully ~1 year is long enough ;)
|
||||||
@Monster("takes at least 4 hours and consumes many GB of temp disk space")
|
@Monster("takes at least 4 hours and consumes many GB of temp disk space")
|
||||||
public class Test2BPoints extends LuceneTestCase {
|
public class Test2BPoints extends LuceneTestCase {
|
||||||
public void test1D() throws Exception {
|
public void test1D() throws Exception {
|
||||||
|
@ -44,13 +52,15 @@ public class Test2BPoints extends LuceneTestCase {
|
||||||
System.out.println("DIR: " + ((FSDirectory) dir).getDirectory());
|
System.out.println("DIR: " + ((FSDirectory) dir).getDirectory());
|
||||||
|
|
||||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()))
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()))
|
||||||
.setCodec(Codec.forName("Lucene60"))
|
.setCodec(getCodec())
|
||||||
.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
||||||
.setRAMBufferSizeMB(64.0)
|
.setRAMBufferSizeMB(256.0)
|
||||||
.setMergeScheduler(new ConcurrentMergeScheduler())
|
.setMergeScheduler(new ConcurrentMergeScheduler())
|
||||||
.setMergePolicy(newLogMergePolicy(false, 10))
|
.setMergePolicy(newLogMergePolicy(false, 10))
|
||||||
.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||||
|
|
||||||
|
((ConcurrentMergeScheduler) iwc.getMergeScheduler()).setMaxMergesAndThreads(6, 3);
|
||||||
|
|
||||||
IndexWriter w = new IndexWriter(dir, iwc);
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
|
|
||||||
MergePolicy mp = w.getConfig().getMergePolicy();
|
MergePolicy mp = w.getConfig().getMergePolicy();
|
||||||
|
@ -88,13 +98,15 @@ public class Test2BPoints extends LuceneTestCase {
|
||||||
Directory dir = FSDirectory.open(createTempDir("2BPoints2D"));
|
Directory dir = FSDirectory.open(createTempDir("2BPoints2D"));
|
||||||
|
|
||||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()))
|
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()))
|
||||||
.setCodec(Codec.forName("Lucene60"))
|
.setCodec(getCodec())
|
||||||
.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
||||||
.setRAMBufferSizeMB(64.0)
|
.setRAMBufferSizeMB(256.0)
|
||||||
.setMergeScheduler(new ConcurrentMergeScheduler())
|
.setMergeScheduler(new ConcurrentMergeScheduler())
|
||||||
.setMergePolicy(newLogMergePolicy(false, 10))
|
.setMergePolicy(newLogMergePolicy(false, 10))
|
||||||
.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||||
|
|
||||||
|
((ConcurrentMergeScheduler) iwc.getMergeScheduler()).setMaxMergesAndThreads(6, 3);
|
||||||
|
|
||||||
IndexWriter w = new IndexWriter(dir, iwc);
|
IndexWriter w = new IndexWriter(dir, iwc);
|
||||||
|
|
||||||
MergePolicy mp = w.getConfig().getMergePolicy();
|
MergePolicy mp = w.getConfig().getMergePolicy();
|
||||||
|
@ -127,4 +139,26 @@ public class Test2BPoints extends LuceneTestCase {
|
||||||
TestUtil.checkIndex(dir);
|
TestUtil.checkIndex(dir);
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Codec getCodec() {
|
||||||
|
|
||||||
|
return new FilterCodec("Lucene60", Codec.forName("Lucene60")) {
|
||||||
|
@Override
|
||||||
|
public PointsFormat pointsFormat() {
|
||||||
|
return new PointsFormat() {
|
||||||
|
@Override
|
||||||
|
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
|
||||||
|
int maxPointsInLeafNode = 1024;
|
||||||
|
double maxMBSortInHeap = 256.0;
|
||||||
|
return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
|
||||||
|
return new Lucene60PointsReader(readState);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue