From a18b62ded49f1b091de7029716d6f63c06a36fc0 Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Wed, 16 Nov 2022 20:52:32 -0800 Subject: [PATCH] Decrease test time for TestManyKnnDocs.testLargeSegment (#11945) * Improve speed of TestManyKnnDocs --- .../lucene/document/TestManyKnnDocs.java | 9 ++-- .../apache/lucene/spatial3d/geom/Plane.java | 6 +-- .../test-framework/src/java/module-info.java | 4 +- .../codecs/vector/ConfigurableMCodec.java | 47 +++++++++++++++++++ .../tests/codecs/vector/package-info.java | 19 ++++++++ .../services/org.apache.lucene.codecs.Codec | 1 + 6 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java create mode 100644 lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java diff --git a/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java b/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java index bc3a249ffa0..5eb419fef8f 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestManyKnnDocs.java @@ -23,19 +23,20 @@ import org.apache.lucene.search.KnnVectorQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.tests.codecs.vector.ConfigurableMCodec; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase.Monster; -import org.apache.lucene.tests.util.TestUtil; @TimeoutSuite(millis = 86_400_000) // 24 hour timeout -@Monster("takes ~2 hours and needs extra heap, disk space, file handles") +@Monster("takes ~10 minutes and needs extra heap, disk space, file handles") public class TestManyKnnDocs extends LuceneTestCase { // gradlew -p lucene/core test --tests TestManyKnnDocs -Ptests.heapsize=16g -Dtests.monster=true public void testLargeSegment() throws Exception { IndexWriterConfig iwc = new IndexWriterConfig(); iwc.setCodec( - TestUtil.getDefaultCodec()); // Make sure to use the default codec instead of a random one + new ConfigurableMCodec( + 128)); // Make sure to use the ConfigurableMCodec instead of a random one iwc.setRAMBufferSizeMB(64); // Use a 64MB buffer to create larger initial segments TieredMergePolicy mp = new TieredMergePolicy(); mp.setMaxMergeAtOnce(256); // avoid intermediate merges (waste of time with HNSW?) @@ -47,7 +48,7 @@ public class TestManyKnnDocs extends LuceneTestCase { try (Directory dir = FSDirectory.open(createTempDir("ManyKnnVectorDocs")); IndexWriter iw = new IndexWriter(dir, iwc)) { - int numVectors = 16268816; + int numVectors = 2088992; float[] vector = new float[1]; Document doc = new Document(); doc.add(new KnnVectorField(fieldName, vector, similarityFunction)); diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java index 9b46c3553bf..80a66d476a1 100755 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/geom/Plane.java @@ -1500,8 +1500,7 @@ public class Plane extends Vector { } else { // Since a==b==0, any plane including the Z axis suffices. // System.err.println(" Perpendicular to z"); - GeoPoint[] points = - findIntersections(planetModel, normalYPlane, NO_BOUNDS, NO_BOUNDS); + GeoPoint[] points = findIntersections(planetModel, normalYPlane, NO_BOUNDS, NO_BOUNDS); if (points.length == 0) { points = findIntersections(planetModel, normalXPlane, NO_BOUNDS, NO_BOUNDS); } @@ -2047,8 +2046,7 @@ public class Plane extends Vector { } } else { // Horizontal circle. Since a==b, any vertical plane suffices. - GeoPoint[] points = - findIntersections(planetModel, normalXPlane, NO_BOUNDS, NO_BOUNDS); + GeoPoint[] points = findIntersections(planetModel, normalXPlane, NO_BOUNDS, NO_BOUNDS); if (points.length == 0) { points = findIntersections(planetModel, normalYPlane, NO_BOUNDS, NO_BOUNDS); } diff --git a/lucene/test-framework/src/java/module-info.java b/lucene/test-framework/src/java/module-info.java index 893d57189bb..f366d1f52b7 100644 --- a/lucene/test-framework/src/java/module-info.java +++ b/lucene/test-framework/src/java/module-info.java @@ -40,6 +40,7 @@ module org.apache.lucene.test_framework { exports org.apache.lucene.tests.codecs.ramonly; exports org.apache.lucene.tests.codecs.uniformsplit.sharedterms; exports org.apache.lucene.tests.codecs.uniformsplit; + exports org.apache.lucene.tests.codecs.vector; exports org.apache.lucene.tests.geo; exports org.apache.lucene.tests.index; exports org.apache.lucene.tests.mockfile; @@ -58,7 +59,8 @@ module org.apache.lucene.test_framework { org.apache.lucene.tests.codecs.compressing.FastDecompressionCompressingCodec, org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec, org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec, - org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec; + org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec, + org.apache.lucene.tests.codecs.vector.ConfigurableMCodec; provides org.apache.lucene.codecs.DocValuesFormat with org.apache.lucene.tests.codecs.asserting.AssertingDocValuesFormat; provides org.apache.lucene.codecs.KnnVectorsFormat with diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java new file mode 100644 index 00000000000..89d0e6a970d --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/ConfigurableMCodec.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.tests.codecs.vector; + +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene94.Lucene94Codec; +import org.apache.lucene.codecs.lucene94.Lucene94HnswVectorsFormat; + +/** + * This codec allows customization of the number of connections made for an hnsw index. Increasing + * the number of connections can decrease the time of certain tests while still achieving the same + * test coverage. + */ +public class ConfigurableMCodec extends FilterCodec { + + private final KnnVectorsFormat knnVectorsFormat; + + public ConfigurableMCodec() { + super("ConfigurableMCodec", new Lucene94Codec()); + knnVectorsFormat = new Lucene94HnswVectorsFormat(128, 100); + } + + public ConfigurableMCodec(int maxConn) { + super("ConfigurableMCodec", new Lucene94Codec()); + knnVectorsFormat = new Lucene94HnswVectorsFormat(maxConn, 100); + } + + @Override + public KnnVectorsFormat knnVectorsFormat() { + return knnVectorsFormat; + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java new file mode 100644 index 00000000000..6c1ab143cc9 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/vector/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Codecs for testing different vector formats. */ +package org.apache.lucene.tests.codecs.vector; diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec index cdc3aeca2a9..7b75d2bd4d3 100644 --- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -21,3 +21,4 @@ org.apache.lucene.tests.codecs.compressing.FastDecompressionCompressingCodec org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec +org.apache.lucene.tests.codecs.vector.ConfigurableMCodec