Decrease test time for TestManyKnnDocs.testLargeSegment (#11945)

* Improve speed of TestManyKnnDocs
This commit is contained in:
Jack Conradson 2022-11-16 20:52:32 -08:00 committed by GitHub
parent b6ebfd1861
commit a18b62ded4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 77 additions and 9 deletions

View File

@ -23,19 +23,20 @@ import org.apache.lucene.search.KnnVectorQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.tests.codecs.vector.ConfigurableMCodec;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.LuceneTestCase.Monster;
import org.apache.lucene.tests.util.TestUtil;
@TimeoutSuite(millis = 86_400_000) // 24 hour timeout
@Monster("takes ~2 hours and needs extra heap, disk space, file handles")
@Monster("takes ~10 minutes and needs extra heap, disk space, file handles")
public class TestManyKnnDocs extends LuceneTestCase {
// gradlew -p lucene/core test --tests TestManyKnnDocs -Ptests.heapsize=16g -Dtests.monster=true
public void testLargeSegment() throws Exception {
IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setCodec(
TestUtil.getDefaultCodec()); // Make sure to use the default codec instead of a random one
new ConfigurableMCodec(
128)); // Make sure to use the ConfigurableMCodec instead of a random one
iwc.setRAMBufferSizeMB(64); // Use a 64MB buffer to create larger initial segments
TieredMergePolicy mp = new TieredMergePolicy();
mp.setMaxMergeAtOnce(256); // avoid intermediate merges (waste of time with HNSW?)
@ -47,7 +48,7 @@ public class TestManyKnnDocs extends LuceneTestCase {
try (Directory dir = FSDirectory.open(createTempDir("ManyKnnVectorDocs"));
IndexWriter iw = new IndexWriter(dir, iwc)) {
int numVectors = 16268816;
int numVectors = 2088992;
float[] vector = new float[1];
Document doc = new Document();
doc.add(new KnnVectorField(fieldName, vector, similarityFunction));

View File

@ -1500,8 +1500,7 @@ public class Plane extends Vector {
} else {
// Since a==b==0, any plane including the Z axis suffices.
// System.err.println(" Perpendicular to z");
GeoPoint[] points =
findIntersections(planetModel, normalYPlane, NO_BOUNDS, NO_BOUNDS);
GeoPoint[] points = findIntersections(planetModel, normalYPlane, NO_BOUNDS, NO_BOUNDS);
if (points.length == 0) {
points = findIntersections(planetModel, normalXPlane, NO_BOUNDS, NO_BOUNDS);
}
@ -2047,8 +2046,7 @@ public class Plane extends Vector {
}
} else {
// Horizontal circle. Since a==b, any vertical plane suffices.
GeoPoint[] points =
findIntersections(planetModel, normalXPlane, NO_BOUNDS, NO_BOUNDS);
GeoPoint[] points = findIntersections(planetModel, normalXPlane, NO_BOUNDS, NO_BOUNDS);
if (points.length == 0) {
points = findIntersections(planetModel, normalYPlane, NO_BOUNDS, NO_BOUNDS);
}

View File

@ -40,6 +40,7 @@ module org.apache.lucene.test_framework {
exports org.apache.lucene.tests.codecs.ramonly;
exports org.apache.lucene.tests.codecs.uniformsplit.sharedterms;
exports org.apache.lucene.tests.codecs.uniformsplit;
exports org.apache.lucene.tests.codecs.vector;
exports org.apache.lucene.tests.geo;
exports org.apache.lucene.tests.index;
exports org.apache.lucene.tests.mockfile;
@ -58,7 +59,8 @@ module org.apache.lucene.test_framework {
org.apache.lucene.tests.codecs.compressing.FastDecompressionCompressingCodec,
org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec,
org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec,
org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec;
org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec,
org.apache.lucene.tests.codecs.vector.ConfigurableMCodec;
provides org.apache.lucene.codecs.DocValuesFormat with
org.apache.lucene.tests.codecs.asserting.AssertingDocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.tests.codecs.vector;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene94.Lucene94Codec;
import org.apache.lucene.codecs.lucene94.Lucene94HnswVectorsFormat;
/**
* This codec allows customization of the number of connections made for an hnsw index. Increasing
* the number of connections can decrease the time of certain tests while still achieving the same
* test coverage.
*/
public class ConfigurableMCodec extends FilterCodec {
private final KnnVectorsFormat knnVectorsFormat;
public ConfigurableMCodec() {
super("ConfigurableMCodec", new Lucene94Codec());
knnVectorsFormat = new Lucene94HnswVectorsFormat(128, 100);
}
public ConfigurableMCodec(int maxConn) {
super("ConfigurableMCodec", new Lucene94Codec());
knnVectorsFormat = new Lucene94HnswVectorsFormat(maxConn, 100);
}
@Override
public KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}
}

View File

@ -0,0 +1,19 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Codecs for testing different vector formats. */
package org.apache.lucene.tests.codecs.vector;

View File

@ -21,3 +21,4 @@ org.apache.lucene.tests.codecs.compressing.FastDecompressionCompressingCodec
org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec
org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec
org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec
org.apache.lucene.tests.codecs.vector.ConfigurableMCodec