Remove deprecated float vector classes and methods (#12107)

Follow-up of #12105 to remove the deprecated classes for the next major version.

Removes KnnVectorField, KnnVectorQuery, VectorValues and LeafReader#getVectorValues.
This commit is contained in:
Luca Cavanna 2023-01-24 16:25:36 +01:00 committed by GitHub
parent ce8eaf138c
commit 95e2cfcc1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 32 additions and 265 deletions

View File

@ -44,6 +44,9 @@ API Changes
* GITHUB#11814: Support deletions in IndexRearranger. (Stefan Vodita) * GITHUB#11814: Support deletions in IndexRearranger. (Stefan Vodita)
* GITHUB#12107: Remove deprecated KnnVectorField, KnnVectorQuery, VectorValues and
LeafReader#getVectorValues. (Luca Cavanna)
New Features New Features
--------------------- ---------------------

View File

@ -60,7 +60,7 @@ public final class Lucene90HnswGraphBuilder {
private final RandomAccessVectorValues<float[]> buildVectors; private final RandomAccessVectorValues<float[]> buildVectors;
/** /**
* Reads all the vectors from a VectorValues, builds a graph connecting them by their dense * Reads all the vectors from vector values, builds a graph connecting them by their dense
* ordinals, using the given hyperparameter settings, and returns the resulting graph. * ordinals, using the given hyperparameter settings, and returns the resulting graph.
* *
* @param vectors the vectors whose relations are represented by the graph - must provide a * @param vectors the vectors whose relations are represented by the graph - must provide a
@ -96,8 +96,8 @@ public final class Lucene90HnswGraphBuilder {
} }
/** /**
* Reads all the vectors from two copies of a random access VectorValues. Providing two copies * Reads all the vectors from two copies of a {@link RandomAccessVectorValues}. Providing two
* enables efficient retrieval without extra data copying, while avoiding collision of the * copies enables efficient retrieval without extra data copying, while avoiding collision of the
* returned values. * returned values.
* *
* @param vectors the vectors for which to build a nearest neighbors graph. Must be an independet * @param vectors the vectors for which to build a nearest neighbors graph. Must be an independet

View File

@ -67,7 +67,7 @@ public final class Lucene91HnswGraphBuilder {
private RandomAccessVectorValues<float[]> buildVectors; private RandomAccessVectorValues<float[]> buildVectors;
/** /**
* Reads all the vectors from a VectorValues, builds a graph connecting them by their dense * Reads all the vectors from vector values, builds a graph connecting them by their dense
* ordinals, using the given hyperparameter settings, and returns the resulting graph. * ordinals, using the given hyperparameter settings, and returns the resulting graph.
* *
* @param vectors the vectors whose relations are represented by the graph - must provide a * @param vectors the vectors whose relations are represented by the graph - must provide a
@ -112,8 +112,8 @@ public final class Lucene91HnswGraphBuilder {
} }
/** /**
* Reads all the vectors from two copies of a random access VectorValues. Providing two copies * Reads all the vectors from two copies of a {@link RandomAccessVectorValues}. Providing two
* enables efficient retrieval without extra data copying, while avoiding collision of the * copies enables efficient retrieval without extra data copying, while avoiding collision of the
* returned values. * returned values.
* *
* @param vectors the vectors for which to build a nearest neighbors graph. Must be an independet * @param vectors the vectors for which to build a nearest neighbors graph. Must be an independet

View File

@ -105,7 +105,7 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
} }
} }
/** Sorting VectorValues that iterate over documents in the order of the provided sortMap */ /** Sorting FloatVectorValues that iterate over documents in the order of the provided sortMap */
private static class SortingVectorValues extends FloatVectorValues { private static class SortingVectorValues extends FloatVectorValues {
private final BufferedVectorValues randomAccess; private final BufferedVectorValues randomAccess;
private final int[] docIdOffsets; private final int[] docIdOffsets;

View File

@ -84,7 +84,7 @@ public abstract class KnnVectorsFormat implements NamedSPILoader.NamedSPI {
new KnnVectorsFormat("EMPTY") { new KnnVectorsFormat("EMPTY") {
@Override @Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) { public KnnVectorsWriter fieldsWriter(SegmentWriteState state) {
throw new UnsupportedOperationException("Attempt to write EMPTY VectorValues"); throw new UnsupportedOperationException("Attempt to write EMPTY vector values");
} }
@Override @Override

View File

@ -138,7 +138,7 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
} }
} }
/** View over multiple VectorValues supporting iterator-style access via DocIdMerger. */ /** View over multiple vector values supporting iterator-style access via DocIdMerger. */
protected static final class MergedVectorValues { protected static final class MergedVectorValues {
private MergedVectorValues() {} private MergedVectorValues() {}

View File

@ -380,7 +380,7 @@ public class FieldType implements IndexableFieldType {
} }
if (numDimensions > FloatVectorValues.MAX_DIMENSIONS) { if (numDimensions > FloatVectorValues.MAX_DIMENSIONS) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"vector numDimensions must be <= VectorValues.MAX_DIMENSIONS (=" "vector numDimensions must be <= FloatVectorValues.MAX_DIMENSIONS (="
+ FloatVectorValues.MAX_DIMENSIONS + FloatVectorValues.MAX_DIMENSIONS
+ "); got " + "); got "
+ numDimensions); + numDimensions);

View File

@ -30,9 +30,9 @@ import org.apache.lucene.util.VectorUtil;
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using * an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
* {@link FloatVectorValues}, which is a forward-only docID-based iterator and also offers * {@link FloatVectorValues}, which is a forward-only docID-based iterator and also offers
* random-access by dense ordinal (not docId). {@link VectorSimilarityFunction} may be used to * random-access by dense ordinal (not docId). {@link VectorSimilarityFunction} may be used to
* compare vectors at query time (for example as part of result ranking). A KnnVectorField may be * compare vectors at query time (for example as part of result ranking). A {@link
* associated with a search similarity function defining the metric used for nearest-neighbor search * KnnFloatVectorField} may be associated with a search similarity function defining the metric used
* among vectors of that field. * for nearest-neighbor search among vectors of that field.
* *
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -1,81 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.VectorUtil;
/**
* A field that contains a single floating-point numeric vector (or none) for each document. Vectors
* are dense - that is, every dimension of a vector contains an explicit value, stored packed into
* an array (of type float[]) whose length is the vector dimension. Values can be retrieved using
* {@link FloatVectorValues}, which is a forward-only docID-based iterator and also offers
* random-access by dense ordinal (not docId). {@link VectorSimilarityFunction} may be used to
* compare vectors at query time (for example as part of result ranking). A KnnVectorField may be
* associated with a search similarity function defining the metric used for nearest-neighbor search
* among vectors of that field.
*
* @deprecated use {@link KnnFloatVectorField} instead
*/
@Deprecated
public class KnnVectorField extends KnnFloatVectorField {
/**
* Creates a numeric vector field. Fields are single-valued: each document has either one value or
* no value. Vectors of a single field share the same dimension and similarity function. Note that
* some vector similarities (like {@link VectorSimilarityFunction#DOT_PRODUCT}) require values to
* be unit-length, which can be enforced using {@link VectorUtil#l2normalize(float[])}.
*
* @param name field name
* @param vector value
* @param similarityFunction a function defining vector proximity.
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
* dimension &gt; 1024.
*/
public KnnVectorField(String name, float[] vector, VectorSimilarityFunction similarityFunction) {
super(name, vector, similarityFunction);
}
/**
* Creates a numeric vector field with the default EUCLIDEAN_HNSW (L2) similarity. Fields are
* single-valued: each document has either one value or no value. Vectors of a single field share
* the same dimension and similarity function.
*
* @param name field name
* @param vector value
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
* dimension &gt; 1024.
*/
public KnnVectorField(String name, float[] vector) {
super(name, vector);
}
/**
* Creates a numeric vector field. Fields are single-valued: each document has either one value or
* no value. Vectors of a single field share the same dimension and similarity function.
*
* @param name field name
* @param vector value
* @param fieldType field type
* @throws IllegalArgumentException if any parameter is null, or the vector is empty or has
* dimension &gt; 1024.
*/
public KnnVectorField(String name, float[] vector, FieldType fieldType) {
super(name, vector, fieldType);
}
}

View File

@ -377,7 +377,7 @@ public final class CheckIndex implements Closeable {
public Throwable error; public Throwable error;
} }
/** Status from testing VectorValues */ /** Status from testing vector values */
public static final class VectorValuesStatus { public static final class VectorValuesStatus {
VectorValuesStatus() {} VectorValuesStatus() {}
@ -1015,7 +1015,7 @@ public final class CheckIndex implements Closeable {
// Test PointValues // Test PointValues
segInfoStat.pointsStatus = testPoints(reader, infoStream, failFast); segInfoStat.pointsStatus = testPoints(reader, infoStream, failFast);
// Test VectorValues // Test FloatVectorValues and ByteVectorValues
segInfoStat.vectorValuesStatus = testVectors(reader, infoStream, failFast); segInfoStat.vectorValuesStatus = testVectors(reader, infoStream, failFast);
// Test Index Sort // Test Index Sort

View File

@ -454,11 +454,11 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
throw new ExitingReaderException( throw new ExitingReaderException(
"The request took too long to iterate over vector values. Timeout: " "The request took too long to iterate over vector values. Timeout: "
+ queryTimeout.toString() + queryTimeout.toString()
+ ", VectorValues=" + ", FloatVectorValues="
+ in); + in);
} else if (Thread.interrupted()) { } else if (Thread.interrupted()) {
throw new ExitingReaderException( throw new ExitingReaderException(
"Interrupted while iterating over vector values. VectorValues=" + in); "Interrupted while iterating over vector values. FloatVectorValues=" + in);
} }
} }
} }
@ -521,11 +521,11 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
throw new ExitingReaderException( throw new ExitingReaderException(
"The request took too long to iterate over vector values. Timeout: " "The request took too long to iterate over vector values. Timeout: "
+ queryTimeout.toString() + queryTimeout.toString()
+ ", VectorValues=" + ", ByteVectorValues="
+ in); + in);
} else if (Thread.interrupted()) { } else if (Thread.interrupted()) {
throw new ExitingReaderException( throw new ExitingReaderException(
"Interrupted while iterating over vector values. VectorValues=" + in); "Interrupted while iterating over vector values. ByteVectorValues=" + in);
} }
} }
} }

View File

@ -244,7 +244,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
return hasPointValues; return hasPointValues;
} }
/** Returns true if any fields have VectorValues */ /** Returns true if any fields have vector values */
public boolean hasVectorValues() { public boolean hasVectorValues() {
return hasVectorValues; return hasVectorValues;
} }

View File

@ -1,64 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Objects;
/** Delegates all methods to a wrapped {@link FloatVectorValues}. */
public abstract class FilterVectorValues extends VectorValues {
/** Wrapped values */
protected final FloatVectorValues in;
/** Sole constructor */
protected FilterVectorValues(FloatVectorValues in) {
Objects.requireNonNull(in);
this.in = in;
}
@Override
public int docID() {
return in.docID();
}
@Override
public int nextDoc() throws IOException {
return in.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return in.advance(target);
}
@Override
public int dimension() {
return in.dimension();
}
@Override
public int size() {
return in.size();
}
@Override
public float[] vectorValue() throws IOException {
return in.vectorValue();
}
}

View File

@ -199,17 +199,6 @@ public abstract class LeafReader extends IndexReader {
*/ */
public abstract NumericDocValues getNormValues(String field) throws IOException; public abstract NumericDocValues getNormValues(String field) throws IOException;
/**
* Returns {@link VectorValues} for this field, or null if no {@link VectorValues} were indexed.
* The returned instance should only be used by a single thread.
*
* @deprecated use {@link #getFloatVectorValues(String)} instead
*/
@Deprecated
public VectorValues getVectorValues(String field) throws IOException {
return new FilterVectorValues(getFloatVectorValues(field)) {};
}
/** /**
* Returns {@link FloatVectorValues} for this field, or null if no {@link FloatVectorValues} were * Returns {@link FloatVectorValues} for this field, or null if no {@link FloatVectorValues} were
* indexed. The returned instance should only be used by a single thread. * indexed. The returned instance should only be used by a single thread.

View File

@ -212,7 +212,7 @@ public final class SortingCodecReader extends FilterCodecReader {
} }
} }
/** Sorting VectorValues that iterate over documents in the order of the provided sortMap */ /** Sorting FloatVectorValues that iterate over documents in the order of the provided sortMap */
private static class SortingFloatVectorValues extends FloatVectorValues { private static class SortingFloatVectorValues extends FloatVectorValues {
final int size; final int size;
final int dimension; final int dimension;

View File

@ -1,32 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import org.apache.lucene.document.KnnFloatVectorField;
/**
* This class provides access to per-document floating point vector values indexed as {@link
* KnnFloatVectorField}.
*
* @deprecated use {@link FloatVectorValues} instead
*/
@Deprecated
public abstract class VectorValues extends FloatVectorValues {
/** Creates an instance to hold floating point vector values for a single document */
VectorValues() {}
}

View File

@ -239,7 +239,7 @@ abstract class AbstractKnnVectorQuery extends Query {
} }
/** /**
* @return the KnnVectorField where the KnnVector search happens. * @return the knn vector field where the knn vector search happens.
*/ */
public String getField() { public String getField() {
return field; return field;

View File

@ -1,47 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.util.Bits;
/**
* Uses {@link KnnVectorsReader#search(String, float[], int, Bits, int)} to perform nearest
* neighbour search.
*
* <p>This query also allows for performing a kNN search subject to a filter. In this case, it first
* executes the filter for each leaf, then chooses a strategy dynamically:
*
* <ul>
* <li>If the filter cost is less than k, just execute an exact search
* <li>Otherwise run a kNN search subject to the filter
* <li>If the kNN search visits too many vectors without completing, stop and run an exact search
* </ul>
*
* @deprecated use {@link KnnFloatVectorQuery} instead
*/
@Deprecated
public class KnnVectorQuery extends KnnFloatVectorQuery {
public KnnVectorQuery(String field, float[] target, int k) {
super(field, target, k);
}
public KnnVectorQuery(String field, float[] target, int k, Query filter) {
super(field, target, k, filter);
}
}

View File

@ -76,7 +76,7 @@ public final class HnswGraphBuilder<T> {
} }
/** /**
* Reads all the vectors from a VectorValues, builds a graph connecting them by their dense * Reads all the vectors from vector values, builds a graph connecting them by their dense
* ordinals, using the given hyperparameter settings, and returns the resulting graph. * ordinals, using the given hyperparameter settings, and returns the resulting graph.
* *
* @param vectors the vectors whose relations are represented by the graph - must provide a * @param vectors the vectors whose relations are represented by the graph - must provide a
@ -123,8 +123,8 @@ public final class HnswGraphBuilder<T> {
} }
/** /**
* Reads all the vectors from two copies of a random access VectorValues. Providing two copies * Reads all the vectors from two copies of a {@link RandomAccessVectorValues}. Providing two
* enables efficient retrieval without extra data copying, while avoiding collision of the * copies enables efficient retrieval without extra data copying, while avoiding collision of the
* returned values. * returned values.
* *
* @param vectorsToAdd the vectors for which to build a nearest neighbors graph. Must be an * @param vectorsToAdd the vectors for which to build a nearest neighbors graph. Must be an

View File

@ -342,7 +342,7 @@ public class TestKnnGraph extends LuceneTestCase {
* approximate KNN search algorithm * approximate KNN search algorithm
*/ */
assertGraphSearch(new int[] {0, 15, 3, 18, 5}, new float[] {0f, 0.1f}, dr); assertGraphSearch(new int[] {0, 15, 3, 18, 5}, new float[] {0f, 0.1f}, dr);
// Tiebreaking by docid must be done after VectorValues.search. // Tiebreaking by docid must be done after search.
// assertGraphSearch(new int[]{11, 1, 8, 14, 21}, new float[]{2, 2}, dr); // assertGraphSearch(new int[]{11, 1, 8, 14, 21}, new float[]{2, 2}, dr);
assertGraphSearch(new int[] {15, 18, 0, 3, 5}, new float[] {0.3f, 0.8f}, dr); assertGraphSearch(new int[] {15, 18, 0, 3, 5}, new float[] {0.3f, 0.8f}, dr);
} }

View File

@ -48,7 +48,7 @@ import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
/** Test cases for KnnVectorQuery objects. */ /** Test cases for AbstractKnnVectorQuery objects. */
abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase { abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
abstract AbstractKnnVectorQuery getKnnVectorQuery( abstract AbstractKnnVectorQuery getKnnVectorQuery(

View File

@ -56,9 +56,8 @@ class MockVectorValues extends AbstractMockVectorValues<float[]> {
return values[pos]; return values[pos];
} else { } else {
// Sometimes use the same scratch array repeatedly, mimicing what the codec will do. // Sometimes use the same scratch array repeatedly, mimicing what the codec will do.
// This should help us catch cases of aliasing where the same VectorValues source is used // This should help us catch cases of aliasing where the same vector values source is used
// twice in a // twice in a single computation.
// single computation.
System.arraycopy(values[pos], 0, scratch, 0, dimension); System.arraycopy(values[pos], 0, scratch, 0, dimension);
return scratch; return scratch;
} }