mirror of https://github.com/apache/lucene.git
Enforce VectorValues.cost() is equal to size(). (#11962)
`VectorValues` have a `cost()` method that reports an approximate number of documents that have a vector, but also a `size()` method that reports the accurate number of vectors in the field. Since KNN vectors only support single-valued fields we should enforce that `cost()` returns the `size()`.
This commit is contained in:
parent
469547e909
commit
802774641a
|
@ -123,6 +123,9 @@ API Changes
|
|||
necessary and also illegal as it reported a number of dimensions equal to
|
||||
zero. (Adrien Grand)
|
||||
|
||||
* GITHUB#11962: VectorValues#cost() now delegates to VectorValues#size().
|
||||
(Adrien Grand)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
* GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall
|
||||
|
|
|
@ -443,11 +443,6 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return ordToDoc.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RandomAccessVectorValues copy() {
|
||||
return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone());
|
||||
|
|
|
@ -495,11 +495,6 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RandomAccessVectorValues copy() {
|
||||
return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());
|
||||
|
|
|
@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
|
|||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue(int targetOrd) throws IOException {
|
||||
slice.seek((long) targetOrd * byteSize);
|
||||
|
@ -286,11 +281,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
|
|||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RandomAccessVectorValues copy() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -333,11 +333,6 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
|
|||
return slowAdvance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return size();
|
||||
}
|
||||
|
||||
private void readAllVectors() throws IOException {
|
||||
for (float[] value : values) {
|
||||
readVector(value);
|
||||
|
|
|
@ -179,11 +179,6 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
|
|||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int dimension() {
|
||||
return subs.get(0).values.dimension();
|
||||
|
|
|
@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
|
|||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue(int targetOrd) throws IOException {
|
||||
slice.seek((long) targetOrd * byteSize);
|
||||
|
@ -295,11 +290,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
|
|||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RandomAccessVectorValues copy() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -280,10 +280,5 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
|
|||
public int advance(int target) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return docsWithFieldIter.cost();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,11 +48,6 @@ public abstract class FilterVectorValues extends VectorValues {
|
|||
return in.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return in.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int dimension() {
|
||||
return in.dimension();
|
||||
|
|
|
@ -39,14 +39,17 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||
public abstract int dimension();
|
||||
|
||||
/**
|
||||
* TODO: should we use cost() for this? We rely on its always being exactly the number of
|
||||
* documents having a value for this field, which is not guaranteed by the cost() contract, but in
|
||||
* all the implementations so far they are the same.
|
||||
* Return the number of vectors for this field.
|
||||
*
|
||||
* @return the number of vectors returned by this iterator
|
||||
*/
|
||||
public abstract int size();
|
||||
|
||||
@Override
|
||||
public final long cost() {
|
||||
return size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the vector value for the current document ID. It is illegal to call this method when the
|
||||
* iterator is not positioned: before advancing, or after failing to advance. The returned array
|
||||
|
@ -127,10 +130,5 @@ public abstract class VectorValues extends DocIdSetIterator {
|
|||
public int advance(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,9 +122,4 @@ class MockVectorValues extends VectorValues implements RandomAccessVectorValues
|
|||
}
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return size();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -767,11 +767,6 @@ public class TestHnswGraph extends LuceneTestCase {
|
|||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue(int ord) {
|
||||
return unitVector2d(ord / (double) size, value);
|
||||
|
|
Loading…
Reference in New Issue