Enforce VectorValues.cost() is equal to size(). (#11962)

`VectorValues` have a `cost()` method that reports an approximate number of
documents that have a vector, but also a `size()` method that reports the
accurate number of vectors in the field. Since KNN vectors only support
single-valued fields we should enforce that `cost()` returns the `size()`.
This commit is contained in:
Adrien Grand 2022-11-23 11:05:00 +01:00 committed by GitHub
parent 469547e909
commit 802774641a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 9 additions and 68 deletions

View File

@ -123,6 +123,9 @@ API Changes
necessary and also illegal as it reported a number of dimensions equal to
zero. (Adrien Grand)
* GITHUB#11962: VectorValues#cost() now delegates to VectorValues#size().
(Adrien Grand)
New Features
---------------------
* GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall

View File

@ -443,11 +443,6 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
return doc;
}
@Override
public long cost() {
return ordToDoc.length;
}
@Override
public RandomAccessVectorValues copy() {
return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone());

View File

@ -495,11 +495,6 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
return doc;
}
@Override
public long cost() {
return size;
}
@Override
public RandomAccessVectorValues copy() {
return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());

View File

@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return size;
}
@Override
public long cost() {
return size;
}
@Override
public float[] vectorValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize);
@ -286,11 +281,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return doc = NO_MORE_DOCS;
}
@Override
public long cost() {
return 0;
}
@Override
public RandomAccessVectorValues copy() throws IOException {
throw new UnsupportedOperationException();

View File

@ -333,11 +333,6 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
return slowAdvance(target);
}
@Override
public long cost() {
return size();
}
private void readAllVectors() throws IOException {
for (float[] value : values) {
readVector(value);

View File

@ -179,11 +179,6 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
return size;
}
@Override
public long cost() {
return size;
}
@Override
public int dimension() {
return subs.get(0).values.dimension();

View File

@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return size;
}
@Override
public long cost() {
return size;
}
@Override
public float[] vectorValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize);
@ -295,11 +290,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return doc = NO_MORE_DOCS;
}
@Override
public long cost() {
return 0;
}
@Override
public RandomAccessVectorValues copy() throws IOException {
throw new UnsupportedOperationException();

View File

@ -280,10 +280,5 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return docsWithFieldIter.cost();
}
}
}

View File

@ -48,11 +48,6 @@ public abstract class FilterVectorValues extends VectorValues {
return in.advance(target);
}
@Override
public long cost() {
return in.cost();
}
@Override
public int dimension() {
return in.dimension();

View File

@ -39,14 +39,17 @@ public abstract class VectorValues extends DocIdSetIterator {
public abstract int dimension();
/**
* TODO: should we use cost() for this? We rely on its always being exactly the number of
* documents having a value for this field, which is not guaranteed by the cost() contract, but in
* all the implementations so far they are the same.
* Return the number of vectors for this field.
*
* @return the number of vectors returned by this iterator
*/
public abstract int size();
@Override
public final long cost() {
return size();
}
/**
* Return the vector value for the current document ID. It is illegal to call this method when the
* iterator is not positioned: before advancing, or after failing to advance. The returned array
@ -127,10 +130,5 @@ public abstract class VectorValues extends DocIdSetIterator {
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return size();
}
}
}

View File

@ -122,9 +122,4 @@ class MockVectorValues extends VectorValues implements RandomAccessVectorValues
}
return NO_MORE_DOCS;
}
@Override
public long cost() {
return size();
}
}

View File

@ -767,11 +767,6 @@ public class TestHnswGraph extends LuceneTestCase {
return doc;
}
@Override
public long cost() {
return size;
}
@Override
public float[] vectorValue(int ord) {
return unitVector2d(ord / (double) size, value);