mirror of https://github.com/apache/lucene.git
Add test for float vector values in FlatVectorsScorer impls (#13851)
This is a test only change that verifies the behaviour when float vector values are passed to our FlatVectorsScorer implementations. This would have caught the bug causing #13844, subsequently fixed by #13850.
This commit is contained in:
parent
4f6eb799bb
commit
25f49d4f86
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.internal.vectorization;
|
||||
|
||||
import static java.util.Locale.ROOT;
|
||||
import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
|
||||
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
|
||||
import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
|
||||
|
@ -24,6 +25,8 @@ import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRO
|
|||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
@ -39,6 +42,7 @@ import java.util.stream.IntStream;
|
|||
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
|
||||
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
|
||||
import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues;
|
||||
import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues;
|
||||
import org.apache.lucene.index.KnnVectorValues;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -329,12 +333,63 @@ public class TestVectorScorer extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// Tests that the FlatVectorsScorer handles float vectors correctly.
|
||||
public void testWithFloatValues() throws IOException {
|
||||
try (Directory dir = new MMapDirectory(createTempDir("testWithFloatValues"))) {
|
||||
final String fileName = "floatvalues";
|
||||
try (IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT)) {
|
||||
var vec = floatToByteArray(1f); // single vector, with one dimension
|
||||
out.writeBytes(vec, 0, vec.length);
|
||||
}
|
||||
|
||||
try (IndexInput in = dir.openInput(fileName, IOContext.DEFAULT)) {
|
||||
for (int times = 0; times < TIMES; times++) {
|
||||
for (var sim : List.of(COSINE, EUCLIDEAN, DOT_PRODUCT, MAXIMUM_INNER_PRODUCT)) {
|
||||
var vectorValues = floatVectorValues(1, 1, in, sim);
|
||||
assert vectorValues.getEncoding().byteSize == 4;
|
||||
|
||||
var supplier1 = DEFAULT_SCORER.getRandomVectorScorerSupplier(sim, vectorValues);
|
||||
var supplier2 = MEMSEG_SCORER.getRandomVectorScorerSupplier(sim, vectorValues);
|
||||
// these assertion assumes that the supplier and scorer's toString will have float
|
||||
// in it, since it's based on float vectors.
|
||||
assertTrue(supplier1.toString().toLowerCase(ROOT).contains("float"));
|
||||
assertTrue(supplier2.toString().toLowerCase(ROOT).contains("float"));
|
||||
assertTrue(supplier1.scorer(0).toString().toLowerCase(ROOT).contains("float"));
|
||||
assertTrue(supplier2.scorer(0).toString().toLowerCase(ROOT).contains("float"));
|
||||
float expected = supplier1.scorer(0).score(0);
|
||||
assertEquals(supplier2.scorer(0).score(0), expected, DELTA);
|
||||
|
||||
var scorer1 = DEFAULT_SCORER.getRandomVectorScorer(sim, vectorValues, new float[] {1f});
|
||||
var scorer2 = MEMSEG_SCORER.getRandomVectorScorer(sim, vectorValues, new float[] {1f});
|
||||
assertTrue(scorer1.toString().toLowerCase(ROOT).contains("float"));
|
||||
assertTrue(scorer2.toString().toLowerCase(ROOT).contains("float"));
|
||||
expected = scorer1.score(0);
|
||||
assertEquals(scorer2.score(0), expected, DELTA);
|
||||
|
||||
expectThrows(
|
||||
Throwable.class,
|
||||
() -> DEFAULT_SCORER.getRandomVectorScorer(sim, vectorValues, new byte[] {1}));
|
||||
expectThrows(
|
||||
Throwable.class,
|
||||
() -> MEMSEG_SCORER.getRandomVectorScorer(sim, vectorValues, new byte[] {1}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KnnVectorValues vectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim)
|
||||
throws IOException {
|
||||
return new OffHeapByteVectorValues.DenseOffHeapVectorValues(
|
||||
dims, size, in.slice("byteValues", 0, in.length()), dims, MEMSEG_SCORER, sim);
|
||||
}
|
||||
|
||||
KnnVectorValues floatVectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim)
|
||||
throws IOException {
|
||||
return new OffHeapFloatVectorValues.DenseOffHeapVectorValues(
|
||||
dims, size, in.slice("floatValues", 0, in.length()), dims, MEMSEG_SCORER, sim);
|
||||
}
|
||||
|
||||
// creates the vector based on the given ordinal, which is reproducible given the ord and dims
|
||||
static byte[] vector(int ord, int dims) {
|
||||
var random = new Random(Objects.hash(ord, dims));
|
||||
|
@ -355,6 +410,11 @@ public class TestVectorScorer extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/** Converts a float value to a byte array. */
|
||||
public static byte[] floatToByteArray(float value) {
|
||||
return ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(value).array();
|
||||
}
|
||||
|
||||
static int randomIntBetween(int minInclusive, int maxInclusive) {
|
||||
return RandomNumbers.randomIntBetween(random(), minInclusive, maxInclusive);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue