mirror of https://github.com/apache/lucene.git
Enforce VectorValues.cost() is equal to size(). (#11962)
`VectorValues` have a `cost()` method that reports an approximate number of documents that have a vector, but also a `size()` method that reports the accurate number of vectors in the field. Since KNN vectors only support single-valued fields we should enforce that `cost()` returns the `size()`.
This commit is contained in:
parent
469547e909
commit
802774641a
|
@ -123,6 +123,9 @@ API Changes
|
||||||
necessary and also illegal as it reported a number of dimensions equal to
|
necessary and also illegal as it reported a number of dimensions equal to
|
||||||
zero. (Adrien Grand)
|
zero. (Adrien Grand)
|
||||||
|
|
||||||
|
* GITHUB#11962: VectorValues#cost() now delegates to VectorValues#size().
|
||||||
|
(Adrien Grand)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
---------------------
|
---------------------
|
||||||
* GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall
|
* GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall
|
||||||
|
|
|
@ -443,11 +443,6 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return ordToDoc.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RandomAccessVectorValues copy() {
|
public RandomAccessVectorValues copy() {
|
||||||
return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone());
|
return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone());
|
||||||
|
|
|
@ -495,11 +495,6 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RandomAccessVectorValues copy() {
|
public RandomAccessVectorValues copy() {
|
||||||
return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());
|
return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());
|
||||||
|
|
|
@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float[] vectorValue(int targetOrd) throws IOException {
|
public float[] vectorValue(int targetOrd) throws IOException {
|
||||||
slice.seek((long) targetOrd * byteSize);
|
slice.seek((long) targetOrd * byteSize);
|
||||||
|
@ -286,11 +281,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
|
||||||
return doc = NO_MORE_DOCS;
|
return doc = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RandomAccessVectorValues copy() throws IOException {
|
public RandomAccessVectorValues copy() throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
|
|
|
@ -333,11 +333,6 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
|
||||||
return slowAdvance(target);
|
return slowAdvance(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void readAllVectors() throws IOException {
|
private void readAllVectors() throws IOException {
|
||||||
for (float[] value : values) {
|
for (float[] value : values) {
|
||||||
readVector(value);
|
readVector(value);
|
||||||
|
|
|
@ -179,11 +179,6 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int dimension() {
|
public int dimension() {
|
||||||
return subs.get(0).values.dimension();
|
return subs.get(0).values.dimension();
|
||||||
|
|
|
@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float[] vectorValue(int targetOrd) throws IOException {
|
public float[] vectorValue(int targetOrd) throws IOException {
|
||||||
slice.seek((long) targetOrd * byteSize);
|
slice.seek((long) targetOrd * byteSize);
|
||||||
|
@ -295,11 +290,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
|
||||||
return doc = NO_MORE_DOCS;
|
return doc = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RandomAccessVectorValues copy() throws IOException {
|
public RandomAccessVectorValues copy() throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
|
|
|
@ -280,10 +280,5 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
|
||||||
public int advance(int target) {
|
public int advance(int target) {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return docsWithFieldIter.cost();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,11 +48,6 @@ public abstract class FilterVectorValues extends VectorValues {
|
||||||
return in.advance(target);
|
return in.advance(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return in.cost();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int dimension() {
|
public int dimension() {
|
||||||
return in.dimension();
|
return in.dimension();
|
||||||
|
|
|
@ -39,14 +39,17 @@ public abstract class VectorValues extends DocIdSetIterator {
|
||||||
public abstract int dimension();
|
public abstract int dimension();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO: should we use cost() for this? We rely on its always being exactly the number of
|
* Return the number of vectors for this field.
|
||||||
* documents having a value for this field, which is not guaranteed by the cost() contract, but in
|
|
||||||
* all the implementations so far they are the same.
|
|
||||||
*
|
*
|
||||||
* @return the number of vectors returned by this iterator
|
* @return the number of vectors returned by this iterator
|
||||||
*/
|
*/
|
||||||
public abstract int size();
|
public abstract int size();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final long cost() {
|
||||||
|
return size();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the vector value for the current document ID. It is illegal to call this method when the
|
* Return the vector value for the current document ID. It is illegal to call this method when the
|
||||||
* iterator is not positioned: before advancing, or after failing to advance. The returned array
|
* iterator is not positioned: before advancing, or after failing to advance. The returned array
|
||||||
|
@ -127,10 +130,5 @@ public abstract class VectorValues extends DocIdSetIterator {
|
||||||
public int advance(int target) throws IOException {
|
public int advance(int target) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,9 +122,4 @@ class MockVectorValues extends VectorValues implements RandomAccessVectorValues
|
||||||
}
|
}
|
||||||
return NO_MORE_DOCS;
|
return NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -767,11 +767,6 @@ public class TestHnswGraph extends LuceneTestCase {
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public long cost() {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float[] vectorValue(int ord) {
|
public float[] vectorValue(int ord) {
|
||||||
return unitVector2d(ord / (double) size, value);
|
return unitVector2d(ord / (double) size, value);
|
||||||
|
|
Loading…
Reference in New Issue