Enforce VectorValues.cost() is equal to size(). (#11962)

`VectorValues` have a `cost()` method that reports an approximate number of
documents that have a vector, but also a `size()` method that reports the
accurate number of vectors in the field. Since KNN vectors only support
single-valued fields we should enforce that `cost()` returns the `size()`.
This commit is contained in:
Adrien Grand 2022-11-23 11:05:00 +01:00 committed by GitHub
parent 469547e909
commit 802774641a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 9 additions and 68 deletions

View File

@ -123,6 +123,9 @@ API Changes
necessary and also illegal as it reported a number of dimensions equal to necessary and also illegal as it reported a number of dimensions equal to
zero. (Adrien Grand) zero. (Adrien Grand)
* GITHUB#11962: VectorValues#cost() now delegates to VectorValues#size().
(Adrien Grand)
New Features New Features
--------------------- ---------------------
* GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall * GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall

View File

@ -443,11 +443,6 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
return doc; return doc;
} }
@Override
public long cost() {
return ordToDoc.length;
}
@Override @Override
public RandomAccessVectorValues copy() { public RandomAccessVectorValues copy() {
return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone()); return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone());

View File

@ -495,11 +495,6 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
return doc; return doc;
} }
@Override
public long cost() {
return size;
}
@Override @Override
public RandomAccessVectorValues copy() { public RandomAccessVectorValues copy() {
return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone()); return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());

View File

@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return size; return size;
} }
@Override
public long cost() {
return size;
}
@Override @Override
public float[] vectorValue(int targetOrd) throws IOException { public float[] vectorValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize); slice.seek((long) targetOrd * byteSize);
@ -286,11 +281,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return doc = NO_MORE_DOCS; return doc = NO_MORE_DOCS;
} }
@Override
public long cost() {
return 0;
}
@Override @Override
public RandomAccessVectorValues copy() throws IOException { public RandomAccessVectorValues copy() throws IOException {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();

View File

@ -333,11 +333,6 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
return slowAdvance(target); return slowAdvance(target);
} }
@Override
public long cost() {
return size();
}
private void readAllVectors() throws IOException { private void readAllVectors() throws IOException {
for (float[] value : values) { for (float[] value : values) {
readVector(value); readVector(value);

View File

@ -179,11 +179,6 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
return size; return size;
} }
@Override
public long cost() {
return size;
}
@Override @Override
public int dimension() { public int dimension() {
return subs.get(0).values.dimension(); return subs.get(0).values.dimension();

View File

@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return size; return size;
} }
@Override
public long cost() {
return size;
}
@Override @Override
public float[] vectorValue(int targetOrd) throws IOException { public float[] vectorValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize); slice.seek((long) targetOrd * byteSize);
@ -295,11 +290,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return doc = NO_MORE_DOCS; return doc = NO_MORE_DOCS;
} }
@Override
public long cost() {
return 0;
}
@Override @Override
public RandomAccessVectorValues copy() throws IOException { public RandomAccessVectorValues copy() throws IOException {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();

View File

@ -280,10 +280,5 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
public int advance(int target) { public int advance(int target) {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@Override
public long cost() {
return docsWithFieldIter.cost();
}
} }
} }

View File

@ -48,11 +48,6 @@ public abstract class FilterVectorValues extends VectorValues {
return in.advance(target); return in.advance(target);
} }
@Override
public long cost() {
return in.cost();
}
@Override @Override
public int dimension() { public int dimension() {
return in.dimension(); return in.dimension();

View File

@ -39,14 +39,17 @@ public abstract class VectorValues extends DocIdSetIterator {
public abstract int dimension(); public abstract int dimension();
/** /**
* TODO: should we use cost() for this? We rely on its always being exactly the number of * Return the number of vectors for this field.
* documents having a value for this field, which is not guaranteed by the cost() contract, but in
* all the implementations so far they are the same.
* *
* @return the number of vectors returned by this iterator * @return the number of vectors returned by this iterator
*/ */
public abstract int size(); public abstract int size();
@Override
public final long cost() {
return size();
}
/** /**
* Return the vector value for the current document ID. It is illegal to call this method when the * Return the vector value for the current document ID. It is illegal to call this method when the
* iterator is not positioned: before advancing, or after failing to advance. The returned array * iterator is not positioned: before advancing, or after failing to advance. The returned array
@ -127,10 +130,5 @@ public abstract class VectorValues extends DocIdSetIterator {
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@Override
public long cost() {
return size();
}
} }
} }

View File

@ -122,9 +122,4 @@ class MockVectorValues extends VectorValues implements RandomAccessVectorValues
} }
return NO_MORE_DOCS; return NO_MORE_DOCS;
} }
@Override
public long cost() {
return size();
}
} }

View File

@ -767,11 +767,6 @@ public class TestHnswGraph extends LuceneTestCase {
return doc; return doc;
} }
@Override
public long cost() {
return size;
}
@Override @Override
public float[] vectorValue(int ord) { public float[] vectorValue(int ord) {
return unitVector2d(ord / (double) size, value); return unitVector2d(ord / (double) size, value);